Commit 9297872d authored by unknown's avatar unknown

auto-ROLLBACK if binlog was not closed properly

auto-commit on Xid_log_event


client/mysqlbinlog.cc:
  auto-ROLLBACK if binlog was not closed properly.
mysql-test/r/ctype_ucs.result:
  results updated
mysql-test/r/mix_innodb_myisam_binlog.result:
  results updated
mysql-test/r/mysqlbinlog2.result:
  results updated
mysql-test/r/rpl_relayrotate.result:
  results updated
mysql-test/r/user_var.result:
  results updated
mysql-test/t/ctype_ucs.test:
  finalize binlog before calling mysqlbinlog
mysql-test/t/user_var.test:
  finalize binlog before calling mysqlbinlog
sql/log_event.cc:
  commit at Xid_log_event
  comments edited
sql/mysqld.cc:
  free(0) fixed
sql/slave.cc:
  rollback at fake Rotate_log_event
sql/sql_class.h:
  no commit_or_rollback argument for binlog->write(THD *thd, IO_CACHE *cache)
sql/log.cc:
  don't write "COMMIT" query, Xid_log_event is enough
sql/log_event.h:
  more comments for LOG_EVENT_BINLOG_IN_USE_F
  LOG_EVENT_FORCE_ROLLBACK_F added
sql/sql_repl.cc:
  rollback at Rotate_log_event.
  don't consider binlog corrupted if it was open when we read Formar_description but closed when we got to the end
sql/sql_repl.h:
  style fix
parent be255d65
...@@ -1086,7 +1086,7 @@ at offset %lu ; this could be a log format error or read error", ...@@ -1086,7 +1086,7 @@ at offset %lu ; this could be a log format error or read error",
/* EOF can't be hit here normally, so it's a real error */ /* EOF can't be hit here normally, so it's a real error */
die("Could not read a Rotate_log_event event \ die("Could not read a Rotate_log_event event \
at offset %lu ; this could be a log format error or read error", at offset %lu ; this could be a log format error or read error",
tmp_pos); tmp_pos);
} }
else else
break; break;
...@@ -1157,9 +1157,16 @@ static int dump_local_log_entries(const char* logname) ...@@ -1157,9 +1157,16 @@ static int dump_local_log_entries(const char* logname)
Log_event* ev = Log_event::read_log_event(file, description_event); Log_event* ev = Log_event::read_log_event(file, description_event);
if (!ev) if (!ev)
{ {
if (file->error) /*
if binlog wasn't closed properly ("in use" flag is set) don't complain
about a corruption, but issue a "ROLLBACK" to annihilate half-logged
transaction. Otherwise, treat it as EOF and move to the next binlog.
*/
if (description_event->flags & LOG_EVENT_BINLOG_IN_USE_F)
fprintf(result_file, "ROLLBACK;\n");
else if (file->error)
{ {
fprintf(stderr, fprintf(stderr,
"Could not read entry at offset %s:" "Could not read entry at offset %s:"
"Error in log format or read error\n", "Error in log format or read error\n",
llstr(old_off,llbuff)); llstr(old_off,llbuff));
......
...@@ -527,6 +527,7 @@ show binlog events from 96; ...@@ -527,6 +527,7 @@ show binlog events from 96;
Log_name Pos Event_type Server_id End_log_pos Info Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 96 User var 1 136 @`v`=_ucs2 0x006100620063 COLLATE ucs2_general_ci master-bin.000001 96 User var 1 136 @`v`=_ucs2 0x006100620063 COLLATE ucs2_general_ci
master-bin.000001 136 Query 1 219 use `test`; insert into t2 values (@v) master-bin.000001 136 Query 1 219 use `test`; insert into t2 values (@v)
flush logs;
/*!40019 SET @@session.max_insert_delayed_threads=0*/; /*!40019 SET @@session.max_insert_delayed_threads=0*/;
SET @`v`:=_ucs2 0x006100620063 COLLATE ucs2_general_ci; SET @`v`:=_ucs2 0x006100620063 COLLATE ucs2_general_ci;
use test; use test;
......
...@@ -11,8 +11,7 @@ Log_name Pos Event_type Server_id End_log_pos Info ...@@ -11,8 +11,7 @@ Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 96 Query 1 # use `test`; BEGIN master-bin.000001 96 Query 1 # use `test`; BEGIN
master-bin.000001 158 Query 1 # use `test`; insert into t1 values(1) master-bin.000001 158 Query 1 # use `test`; insert into t1 values(1)
master-bin.000001 239 Query 1 # use `test`; insert into t2 select * from t1 master-bin.000001 239 Query 1 # use `test`; insert into t2 select * from t1
master-bin.000001 327 Xid 1 # xid=7 master-bin.000001 327 Xid 1 # COMMIT /* xid=7 */
master-bin.000001 354 Query 1 # use `test`; COMMIT
delete from t1; delete from t1;
delete from t2; delete from t2;
reset master; reset master;
...@@ -48,8 +47,7 @@ master-bin.000001 239 Query 1 # use `test`; savepoint my_savepoint ...@@ -48,8 +47,7 @@ master-bin.000001 239 Query 1 # use `test`; savepoint my_savepoint
master-bin.000001 318 Query 1 # use `test`; insert into t1 values(4) master-bin.000001 318 Query 1 # use `test`; insert into t1 values(4)
master-bin.000001 399 Query 1 # use `test`; insert into t2 select * from t1 master-bin.000001 399 Query 1 # use `test`; insert into t2 select * from t1
master-bin.000001 487 Query 1 # use `test`; rollback to savepoint my_savepoint master-bin.000001 487 Query 1 # use `test`; rollback to savepoint my_savepoint
master-bin.000001 578 Xid 1 # xid=24 master-bin.000001 578 Xid 1 # COMMIT /* xid=24 */
master-bin.000001 605 Query 1 # use `test`; COMMIT
delete from t1; delete from t1;
delete from t2; delete from t2;
reset master; reset master;
...@@ -76,8 +74,7 @@ master-bin.000001 318 Query 1 # use `test`; insert into t1 values(6) ...@@ -76,8 +74,7 @@ master-bin.000001 318 Query 1 # use `test`; insert into t1 values(6)
master-bin.000001 399 Query 1 # use `test`; insert into t2 select * from t1 master-bin.000001 399 Query 1 # use `test`; insert into t2 select * from t1
master-bin.000001 487 Query 1 # use `test`; rollback to savepoint my_savepoint master-bin.000001 487 Query 1 # use `test`; rollback to savepoint my_savepoint
master-bin.000001 578 Query 1 # use `test`; insert into t1 values(7) master-bin.000001 578 Query 1 # use `test`; insert into t1 values(7)
master-bin.000001 659 Xid 1 # xid=36 master-bin.000001 659 Xid 1 # COMMIT /* xid=36 */
master-bin.000001 686 Query 1 # use `test`; COMMIT
delete from t1; delete from t1;
delete from t2; delete from t2;
reset master; reset master;
...@@ -106,9 +103,8 @@ show binlog events from 96; ...@@ -106,9 +103,8 @@ show binlog events from 96;
Log_name Pos Event_type Server_id End_log_pos Info Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 96 Query 1 # use `test`; BEGIN master-bin.000001 96 Query 1 # use `test`; BEGIN
master-bin.000001 158 Query 1 # use `test`; insert into t1 values(9) master-bin.000001 158 Query 1 # use `test`; insert into t1 values(9)
master-bin.000001 239 Xid 1 # xid=59 master-bin.000001 239 Xid 1 # COMMIT /* xid=59 */
master-bin.000001 266 Query 1 # use `test`; COMMIT master-bin.000001 266 Query 1 # use `test`; insert into t2 select * from t1
master-bin.000001 329 Query 1 # use `test`; insert into t2 select * from t1
delete from t1; delete from t1;
delete from t2; delete from t2;
reset master; reset master;
...@@ -119,22 +115,19 @@ show binlog events from 96; ...@@ -119,22 +115,19 @@ show binlog events from 96;
Log_name Pos Event_type Server_id End_log_pos Info Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 96 Query 1 # use `test`; BEGIN master-bin.000001 96 Query 1 # use `test`; BEGIN
master-bin.000001 158 Query 1 # use `test`; insert into t1 values(10) master-bin.000001 158 Query 1 # use `test`; insert into t1 values(10)
master-bin.000001 240 Xid 1 # xid=65 master-bin.000001 240 Xid 1 # COMMIT /* xid=65 */
master-bin.000001 267 Query 1 # use `test`; COMMIT master-bin.000001 267 Query 1 # use `test`; insert into t2 select * from t1
master-bin.000001 330 Query 1 # use `test`; insert into t2 select * from t1
insert into t1 values(11); insert into t1 values(11);
commit; commit;
show binlog events from 96; show binlog events from 96;
Log_name Pos Event_type Server_id End_log_pos Info Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 96 Query 1 # use `test`; BEGIN master-bin.000001 96 Query 1 # use `test`; BEGIN
master-bin.000001 158 Query 1 # use `test`; insert into t1 values(10) master-bin.000001 158 Query 1 # use `test`; insert into t1 values(10)
master-bin.000001 240 Xid 1 # xid=65 master-bin.000001 240 Xid 1 # COMMIT /* xid=65 */
master-bin.000001 267 Query 1 # use `test`; COMMIT master-bin.000001 267 Query 1 # use `test`; insert into t2 select * from t1
master-bin.000001 330 Query 1 # use `test`; insert into t2 select * from t1 master-bin.000001 355 Query 1 # use `test`; BEGIN
master-bin.000001 418 Query 1 # use `test`; BEGIN master-bin.000001 417 Query 1 # use `test`; insert into t1 values(11)
master-bin.000001 480 Query 1 # use `test`; insert into t1 values(11) master-bin.000001 499 Xid 1 # COMMIT /* xid=67 */
master-bin.000001 562 Xid 1 # xid=67
master-bin.000001 589 Query 1 # use `test`; COMMIT
alter table t2 engine=INNODB; alter table t2 engine=INNODB;
delete from t1; delete from t1;
delete from t2; delete from t2;
...@@ -148,8 +141,7 @@ Log_name Pos Event_type Server_id End_log_pos Info ...@@ -148,8 +141,7 @@ Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 96 Query 1 # use `test`; BEGIN master-bin.000001 96 Query 1 # use `test`; BEGIN
master-bin.000001 158 Query 1 # use `test`; insert into t1 values(12) master-bin.000001 158 Query 1 # use `test`; insert into t1 values(12)
master-bin.000001 240 Query 1 # use `test`; insert into t2 select * from t1 master-bin.000001 240 Query 1 # use `test`; insert into t2 select * from t1
master-bin.000001 328 Xid 1 # xid=77 master-bin.000001 328 Xid 1 # COMMIT /* xid=77 */
master-bin.000001 355 Query 1 # use `test`; COMMIT
delete from t1; delete from t1;
delete from t2; delete from t2;
reset master; reset master;
...@@ -173,8 +165,7 @@ show binlog events from 96; ...@@ -173,8 +165,7 @@ show binlog events from 96;
Log_name Pos Event_type Server_id End_log_pos Info Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 96 Query 1 # use `test`; BEGIN master-bin.000001 96 Query 1 # use `test`; BEGIN
master-bin.000001 158 Query 1 # use `test`; insert into t1 values(14) master-bin.000001 158 Query 1 # use `test`; insert into t1 values(14)
master-bin.000001 240 Xid 1 # xid=93 master-bin.000001 240 Xid 1 # COMMIT /* xid=93 */
master-bin.000001 267 Query 1 # use `test`; COMMIT
delete from t1; delete from t1;
delete from t2; delete from t2;
reset master; reset master;
...@@ -195,8 +186,7 @@ Log_name Pos Event_type Server_id End_log_pos Info ...@@ -195,8 +186,7 @@ Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 96 Query 1 # use `test`; BEGIN master-bin.000001 96 Query 1 # use `test`; BEGIN
master-bin.000001 158 Query 1 # use `test`; insert into t1 values(16) master-bin.000001 158 Query 1 # use `test`; insert into t1 values(16)
master-bin.000001 240 Query 1 # use `test`; insert into t1 values(18) master-bin.000001 240 Query 1 # use `test`; insert into t1 values(18)
master-bin.000001 322 Xid 1 # xid=104 master-bin.000001 322 Xid 1 # COMMIT /* xid=104 */
master-bin.000001 349 Query 1 # use `test`; COMMIT
delete from t1; delete from t1;
delete from t2; delete from t2;
alter table t2 type=MyISAM; alter table t2 type=MyISAM;
......
...@@ -144,6 +144,7 @@ SET TIMESTAMP=1579609943; ...@@ -144,6 +144,7 @@ SET TIMESTAMP=1579609943;
SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1; SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1;
SET @@session.sql_mode=0; SET @@session.sql_mode=0;
insert into t1 values(null, "f"); insert into t1 values(null, "f");
ROLLBACK;
--- offset -- --- offset --
/*!40019 SET @@session.max_insert_delayed_threads=0*/; /*!40019 SET @@session.max_insert_delayed_threads=0*/;
...@@ -171,6 +172,7 @@ SET TIMESTAMP=1579609943; ...@@ -171,6 +172,7 @@ SET TIMESTAMP=1579609943;
SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1; SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1;
SET @@session.sql_mode=0; SET @@session.sql_mode=0;
insert into t1 values(null, "f"); insert into t1 values(null, "f");
ROLLBACK;
--- start-position -- --- start-position --
/*!40019 SET @@session.max_insert_delayed_threads=0*/; /*!40019 SET @@session.max_insert_delayed_threads=0*/;
...@@ -188,6 +190,7 @@ SET TIMESTAMP=1579609943; ...@@ -188,6 +190,7 @@ SET TIMESTAMP=1579609943;
SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1; SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1;
SET @@session.sql_mode=0; SET @@session.sql_mode=0;
insert into t1 values(null, "f"); insert into t1 values(null, "f");
ROLLBACK;
--- stop-position -- --- stop-position --
/*!40019 SET @@session.max_insert_delayed_threads=0*/; /*!40019 SET @@session.max_insert_delayed_threads=0*/;
...@@ -233,6 +236,7 @@ SET TIMESTAMP=1579609943; ...@@ -233,6 +236,7 @@ SET TIMESTAMP=1579609943;
SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1; SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1;
SET @@session.sql_mode=0; SET @@session.sql_mode=0;
insert into t1 values(null, "f"); insert into t1 values(null, "f");
ROLLBACK;
--- stop-datetime -- --- stop-datetime --
/*!40019 SET @@session.max_insert_delayed_threads=0*/; /*!40019 SET @@session.max_insert_delayed_threads=0*/;
......
...@@ -18,5 +18,5 @@ max(a) ...@@ -18,5 +18,5 @@ max(a)
8000 8000
show slave status; show slave status;
Slave_IO_State Master_Host Master_User Master_Port Connect_Retry Master_Log_File Read_Master_Log_Pos Relay_Log_File Relay_Log_Pos Relay_Master_Log_File Slave_IO_Running Slave_SQL_Running Replicate_Do_DB Replicate_Ignore_DB Replicate_Do_Table Replicate_Ignore_Table Replicate_Wild_Do_Table Replicate_Wild_Ignore_Table Last_Errno Last_Error Skip_Counter Exec_Master_Log_Pos Relay_Log_Space Until_Condition Until_Log_File Until_Log_Pos Master_SSL_Allowed Master_SSL_CA_File Master_SSL_CA_Path Master_SSL_Cert Master_SSL_Cipher Master_SSL_Key Seconds_Behind_Master Slave_IO_State Master_Host Master_User Master_Port Connect_Retry Master_Log_File Read_Master_Log_Pos Relay_Log_File Relay_Log_Pos Relay_Master_Log_File Slave_IO_Running Slave_SQL_Running Replicate_Do_DB Replicate_Ignore_DB Replicate_Do_Table Replicate_Ignore_Table Replicate_Wild_Do_Table Replicate_Wild_Ignore_Table Last_Errno Last_Error Skip_Counter Exec_Master_Log_Pos Relay_Log_Space Until_Condition Until_Log_File Until_Log_Pos Master_SSL_Allowed Master_SSL_CA_File Master_SSL_CA_Path Master_SSL_Cert Master_SSL_Cipher Master_SSL_Key Seconds_Behind_Master
# 127.0.0.1 root MASTER_MYPORT 1 master-bin.000001 687235 # # master-bin.000001 Yes Yes 0 0 687235 # None 0 No # # 127.0.0.1 root MASTER_MYPORT 1 master-bin.000001 687172 # # master-bin.000001 Yes Yes 0 0 687172 # None 0 No #
drop table t1; drop table t1;
...@@ -179,6 +179,7 @@ master-bin.000001 96 User var 1 137 @`a b`=_latin1 0x68656C6C6F COLLATE latin1_s ...@@ -179,6 +179,7 @@ master-bin.000001 96 User var 1 137 @`a b`=_latin1 0x68656C6C6F COLLATE latin1_s
master-bin.000001 137 Query 1 223 use `test`; INSERT INTO t1 VALUES(@`a b`) master-bin.000001 137 Query 1 223 use `test`; INSERT INTO t1 VALUES(@`a b`)
master-bin.000001 223 User var 1 265 @`var1`=_latin1 0x273B616161 COLLATE latin1_swedish_ci master-bin.000001 223 User var 1 265 @`var1`=_latin1 0x273B616161 COLLATE latin1_swedish_ci
master-bin.000001 265 Query 1 351 use `test`; insert into t1 values (@var1) master-bin.000001 265 Query 1 351 use `test`; insert into t1 values (@var1)
flush logs;
/*!40019 SET @@session.max_insert_delayed_threads=0*/; /*!40019 SET @@session.max_insert_delayed_threads=0*/;
SET @`a b`:=_latin1 0x68656C6C6F COLLATE latin1_swedish_ci; SET @`a b`:=_latin1 0x68656C6C6F COLLATE latin1_swedish_ci;
use test; use test;
......
...@@ -339,6 +339,7 @@ set @v=convert('abc' using ucs2); ...@@ -339,6 +339,7 @@ set @v=convert('abc' using ucs2);
reset master; reset master;
insert into t2 values (@v); insert into t2 values (@v);
show binlog events from 96; show binlog events from 96;
flush logs;
# more important than SHOW BINLOG EVENTS, mysqlbinlog (where we # more important than SHOW BINLOG EVENTS, mysqlbinlog (where we
# absolutely need variables names to be quoted and strings to be # absolutely need variables names to be quoted and strings to be
# escaped). # escaped).
......
...@@ -110,6 +110,7 @@ INSERT INTO t1 VALUES(@`a b`); ...@@ -110,6 +110,7 @@ INSERT INTO t1 VALUES(@`a b`);
set @var1= "';aaa"; set @var1= "';aaa";
insert into t1 values (@var1); insert into t1 values (@var1);
show binlog events from 96; show binlog events from 96;
flush logs;
# more important than SHOW BINLOG EVENTS, mysqlbinlog (where we # more important than SHOW BINLOG EVENTS, mysqlbinlog (where we
# absolutely need variables names to be quoted and strings to be # absolutely need variables names to be quoted and strings to be
# escaped). # escaped).
......
...@@ -119,7 +119,7 @@ static int binlog_commit(THD *thd, bool all) ...@@ -119,7 +119,7 @@ static int binlog_commit(THD *thd, bool all)
} }
/* Update the binary log as we have cached some queries */ /* Update the binary log as we have cached some queries */
error= mysql_bin_log.write(thd, trans_log, 1); error= mysql_bin_log.write(thd, trans_log);
binlog_cleanup_trans(trans_log); binlog_cleanup_trans(trans_log);
DBUG_RETURN(error); DBUG_RETURN(error);
} }
...@@ -142,7 +142,11 @@ static int binlog_rollback(THD *thd, bool all) ...@@ -142,7 +142,11 @@ static int binlog_rollback(THD *thd, bool all)
non-transactional table inside a transaction...) non-transactional table inside a transaction...)
*/ */
if (unlikely(thd->options & OPTION_STATUS_NO_TRANS_UPDATE)) if (unlikely(thd->options & OPTION_STATUS_NO_TRANS_UPDATE))
error= mysql_bin_log.write(thd, trans_log, 0); {
Query_log_event qev(thd, "ROLLBACK", 8, TRUE, FALSE);
qev.write(trans_log);
error= mysql_bin_log.write(thd, trans_log);
}
binlog_cleanup_trans(trans_log); binlog_cleanup_trans(trans_log);
DBUG_RETURN(error); DBUG_RETURN(error);
} }
...@@ -425,7 +429,6 @@ const char *MYSQL_LOG::generate_name(const char *log_name, ...@@ -425,7 +429,6 @@ const char *MYSQL_LOG::generate_name(const char *log_name,
const char *suffix, const char *suffix,
bool strip_ext, char *buff) bool strip_ext, char *buff)
{ {
DBUG_ASSERT(!strip_ext || (log_name && log_name[0]));
if (!log_name || !log_name[0]) if (!log_name || !log_name[0])
{ {
/* /*
...@@ -611,6 +614,7 @@ bool MYSQL_LOG::open(const char *log_name, ...@@ -611,6 +614,7 @@ bool MYSQL_LOG::open(const char *log_name,
even if this is not the very first binlog. even if this is not the very first binlog.
*/ */
Format_description_log_event s(BINLOG_VERSION); Format_description_log_event s(BINLOG_VERSION);
s.flags|= LOG_EVENT_BINLOG_IN_USE_F;
if (!s.is_valid()) if (!s.is_valid())
goto err; goto err;
if (null_created_arg) if (null_created_arg)
...@@ -1779,8 +1783,6 @@ uint MYSQL_LOG::next_file_id() ...@@ -1779,8 +1783,6 @@ uint MYSQL_LOG::next_file_id()
write() write()
thd thd
cache The cache to copy to the binlog cache The cache to copy to the binlog
is_commit If true, will write "COMMIT" in the end, if false will
write "ROLLBACK".
NOTE NOTE
- We only come here if there is something in the cache. - We only come here if there is something in the cache.
...@@ -1799,7 +1801,7 @@ uint MYSQL_LOG::next_file_id() ...@@ -1799,7 +1801,7 @@ uint MYSQL_LOG::next_file_id()
same updates are run on the slave. same updates are run on the slave.
*/ */
bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool is_commit) bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache)
{ {
VOID(pthread_mutex_lock(&LOCK_log)); VOID(pthread_mutex_lock(&LOCK_log));
DBUG_ENTER("MYSQL_LOG::write(cache"); DBUG_ENTER("MYSQL_LOG::write(cache");
...@@ -1809,18 +1811,10 @@ bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool is_commit) ...@@ -1809,18 +1811,10 @@ bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool is_commit)
uint length; uint length;
/* /*
Add the "BEGIN" and "COMMIT" in the binlog around transactions Log "BEGIN" at the beginning of the transaction.
which may contain more than 1 SQL statement. If we run with which may contain more than 1 SQL statement.
AUTOCOMMIT=1, then MySQL immediately writes each SQL statement to There is no need to append "COMMIT", as it's already in the 'cache'
the binlog when the statement has been completed. No need to add (in fact, Xid_log_event is there which does the commit on slaves)
"BEGIN" ... "COMMIT" around such statements. Otherwise, MySQL uses
trans_log (that is thd->ha_data[binlog_hton.slot]) to cache
the SQL statements until the explicit commit, and at the commit writes
the contents in trans_log to the binlog.
We write the "BEGIN" mark first in the buffer (trans_log) where we
store the SQL statements for a transaction. At the transaction commit
we will add the "COMMIT mark and write the buffer to the binlog.
*/ */
{ {
Query_log_event qinfo(thd, "BEGIN", 5, TRUE, FALSE); Query_log_event qinfo(thd, "BEGIN", 5, TRUE, FALSE);
...@@ -1846,6 +1840,7 @@ bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool is_commit) ...@@ -1846,6 +1840,7 @@ bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool is_commit)
if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0)) if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
goto err; goto err;
length=my_b_bytes_in_cache(cache); length=my_b_bytes_in_cache(cache);
DBUG_EXECUTE_IF("half_binlogged_transaction", length-=100;);
do do
{ {
/* Write data to the binary log file */ /* Write data to the binary log file */
...@@ -1854,21 +1849,9 @@ bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool is_commit) ...@@ -1854,21 +1849,9 @@ bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool is_commit)
cache->read_pos=cache->read_end; // Mark buffer used up cache->read_pos=cache->read_end; // Mark buffer used up
} while ((length=my_b_fill(cache))); } while ((length=my_b_fill(cache)));
/* if (flush_io_cache(&log_file) || sync_binlog(&log_file))
We write the command "COMMIT" as the last SQL command in the
binlog segment cached for this transaction
*/
{
Query_log_event qinfo(thd,
is_commit ? "COMMIT" : "ROLLBACK",
is_commit ? 6 : 8,
TRUE, FALSE);
qinfo.error_code= 0;
if (qinfo.write(&log_file) || flush_io_cache(&log_file) ||
sync_binlog(&log_file))
goto err; goto err;
} DBUG_EXECUTE_IF("half_binlogged_transaction", abort(););
if (cache->error) // Error on read if (cache->error) // Error on read
{ {
sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno); sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno);
...@@ -2093,10 +2076,10 @@ void MYSQL_LOG::close(uint exiting) ...@@ -2093,10 +2076,10 @@ void MYSQL_LOG::close(uint exiting)
end_io_cache(&log_file); end_io_cache(&log_file);
/* don't pwrite in a file opened with O_APPEND - it doesn't work */ /* don't pwrite in a file opened with O_APPEND - it doesn't work */
if (log_file.type == WRITE_CACHE) if (log_file.type == WRITE_CACHE && log_type == LOG_BIN)
{ {
my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET; my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
char flags=LOG_EVENT_BINLOG_CLOSED_F; char flags=0; // clearing LOG_EVENT_BINLOG_IN_USE_F
my_pwrite(log_file.file, &flags, 1, offset, MYF(0)); my_pwrite(log_file.file, &flags, 1, offset, MYF(0));
} }
...@@ -2944,15 +2927,12 @@ int TC_LOG_BINLOG::open(const char *opt_name) ...@@ -2944,15 +2927,12 @@ int TC_LOG_BINLOG::open(const char *opt_name)
goto err; goto err;
} }
if (((ev= Log_event::read_log_event(&log, 0, &fdle))) && if ((ev= Log_event::read_log_event(&log, 0, &fdle)) &&
(ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)) ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
{ ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
if (ev->flags & LOG_EVENT_BINLOG_CLOSED_F)
error=0;
else
error= recover(&log, (Format_description_log_event *)ev); error= recover(&log, (Format_description_log_event *)ev);
} else
// else nothing to do (probably MySQL 4.x binlog) error=0;
delete ev; delete ev;
end_io_cache(&log); end_io_cache(&log);
...@@ -3009,6 +2989,8 @@ int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle) ...@@ -3009,6 +2989,8 @@ int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle)
init_alloc_root(&mem_root, tc_log_page_size, tc_log_page_size); init_alloc_root(&mem_root, tc_log_page_size, tc_log_page_size);
fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
while ((ev= Log_event::read_log_event(log,0,fdle)) && ev->is_valid()) while ((ev= Log_event::read_log_event(log,0,fdle)) && ev->is_valid())
{ {
if (ev->get_type_code() == XID_EVENT) if (ev->get_type_code() == XID_EVENT)
......
...@@ -292,10 +292,10 @@ Log_event::Log_event(THD* thd_arg, uint16 flags_arg, bool using_trans) ...@@ -292,10 +292,10 @@ Log_event::Log_event(THD* thd_arg, uint16 flags_arg, bool using_trans)
/* /*
This minimal constructor is for when you are not even sure that there is a This minimal constructor is for when you are not even sure that there
valid THD. For example in the server when we are shutting down or flushing is a valid THD. For example in the server when we are shutting down or
logs after receiving a SIGHUP (then we must write a Rotate to the binlog but flushing logs after receiving a SIGHUP (then we must write a Rotate to
we have no THD, so we need this minimal constructor). the binlog but we have no THD, so we need this minimal constructor).
*/ */
Log_event::Log_event() Log_event::Log_event()
...@@ -331,14 +331,14 @@ Log_event::Log_event(const char* buf, ...@@ -331,14 +331,14 @@ Log_event::Log_event(const char* buf,
/* 4.0 or newer */ /* 4.0 or newer */
log_pos= uint4korr(buf + LOG_POS_OFFSET); log_pos= uint4korr(buf + LOG_POS_OFFSET);
/* /*
If the log is 4.0 (so here it can only be a 4.0 relay log read by the SQL If the log is 4.0 (so here it can only be a 4.0 relay log read by
thread or a 4.0 master binlog read by the I/O thread), log_pos is the the SQL thread or a 4.0 master binlog read by the I/O thread),
beginning of the event: we transform it into the end of the event, which is log_pos is the beginning of the event: we transform it into the end
more useful. of the event, which is more useful.
But how do you know that the log is 4.0: you know it if description_event But how do you know that the log is 4.0: you know it if
is version 3 *and* you are not reading a Format_desc (remember that description_event is version 3 *and* you are not reading a
mysqlbinlog starts by assuming that 5.0 logs are in 4.0 format, until it Format_desc (remember that mysqlbinlog starts by assuming that 5.0
finds a Format_desc). logs are in 4.0 format, until it finds a Format_desc).
*/ */
if (description_event->binlog_version==3 && if (description_event->binlog_version==3 &&
buf[EVENT_TYPE_OFFSET]<FORMAT_DESCRIPTION_EVENT && log_pos) buf[EVENT_TYPE_OFFSET]<FORMAT_DESCRIPTION_EVENT && log_pos)
...@@ -346,13 +346,13 @@ Log_event::Log_event(const char* buf, ...@@ -346,13 +346,13 @@ Log_event::Log_event(const char* buf,
/* /*
If log_pos=0, don't change it. log_pos==0 is a marker to mean If log_pos=0, don't change it. log_pos==0 is a marker to mean
"don't change rli->group_master_log_pos" (see "don't change rli->group_master_log_pos" (see
inc_group_relay_log_pos()). As it is unreal log_pos, adding the event inc_group_relay_log_pos()). As it is unreal log_pos, adding the
len's is nonsense. For example, a fake Rotate event should event len's is nonsense. For example, a fake Rotate event should
not have its log_pos (which is 0) changed or it will modify not have its log_pos (which is 0) changed or it will modify
Exec_master_log_pos in SHOW SLAVE STATUS, displaying a nonsense value Exec_master_log_pos in SHOW SLAVE STATUS, displaying a nonsense
of (a non-zero offset which does not exist in the master's binlog, so value of (a non-zero offset which does not exist in the master's
which will cause problems if the user uses this value in binlog, so which will cause problems if the user uses this value
CHANGE MASTER). in CHANGE MASTER).
*/ */
log_pos+= uint4korr(buf + EVENT_LEN_OFFSET); log_pos+= uint4korr(buf + EVENT_LEN_OFFSET);
} }
...@@ -363,16 +363,17 @@ Log_event::Log_event(const char* buf, ...@@ -363,16 +363,17 @@ Log_event::Log_event(const char* buf,
(buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT)) (buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT))
{ {
/* /*
These events always have a header which stops here (i.e. their header is These events always have a header which stops here (i.e. their
FROZEN). header is FROZEN).
*/ */
/* /*
Initialization to zero of all other Log_event members as they're not Initialization to zero of all other Log_event members as they're
specified. Currently there are no such members; in the future there will not specified. Currently there are no such members; in the future
be an event UID (but Format_description and Rotate don't need this UID, there will be an event UID (but Format_description and Rotate
as they are not propagated through --log-slave-updates (remember the UID don't need this UID, as they are not propagated through
is used to not play a query twice when you have two masters which are --log-slave-updates (remember the UID is used to not play a query
slaves of a 3rd master). Then we are done. twice when you have two masters which are slaves of a 3rd master).
Then we are done.
*/ */
return; return;
} }
...@@ -405,10 +406,10 @@ int Log_event::exec_event(struct st_relay_log_info* rli) ...@@ -405,10 +406,10 @@ int Log_event::exec_event(struct st_relay_log_info* rli)
if (rli) if (rli)
{ {
/* /*
If in a transaction, and if the slave supports transactions, If in a transaction, and if the slave supports transactions, just
just inc_event_relay_log_pos(). We only have to check for OPTION_BEGIN inc_event_relay_log_pos(). We only have to check for OPTION_BEGIN
(not OPTION_NOT_AUTOCOMMIT) as transactions are logged (not OPTION_NOT_AUTOCOMMIT) as transactions are logged with
with BEGIN/COMMIT, not with SET AUTOCOMMIT= . BEGIN/COMMIT, not with SET AUTOCOMMIT= .
CAUTION: opt_using_transactions means CAUTION: opt_using_transactions means
innodb || bdb ; suppose the master supports InnoDB and BDB, innodb || bdb ; suppose the master supports InnoDB and BDB,
...@@ -416,17 +417,18 @@ int Log_event::exec_event(struct st_relay_log_info* rli) ...@@ -416,17 +417,18 @@ int Log_event::exec_event(struct st_relay_log_info* rli)
will arise: will arise:
- suppose an InnoDB table is created on the master, - suppose an InnoDB table is created on the master,
- then it will be MyISAM on the slave - then it will be MyISAM on the slave
- but as opt_using_transactions is true, the slave will believe he is - but as opt_using_transactions is true, the slave will believe he
transactional with the MyISAM table. And problems will come when one is transactional with the MyISAM table. And problems will come
does START SLAVE; STOP SLAVE; START SLAVE; (the slave will resume at when one does START SLAVE; STOP SLAVE; START SLAVE; (the slave
BEGIN whereas there has not been any rollback). This is the problem of will resume at BEGIN whereas there has not been any rollback).
using opt_using_transactions instead of a finer This is the problem of using opt_using_transactions instead of a
"does the slave support _the_transactional_handler_used_on_the_master_". finer "does the slave support
_the_transactional_handler_used_on_the_master_".
More generally, we'll have problems when a query mixes a transactional
handler and MyISAM and STOP SLAVE is issued in the middle of the More generally, we'll have problems when a query mixes a
"transaction". START SLAVE will resume at BEGIN while the MyISAM table transactional handler and MyISAM and STOP SLAVE is issued in the
has already been updated. middle of the "transaction". START SLAVE will resume at BEGIN
while the MyISAM table has already been updated.
*/ */
if ((thd->options & OPTION_BEGIN) && opt_using_transactions) if ((thd->options & OPTION_BEGIN) && opt_using_transactions)
rli->inc_event_relay_log_pos(); rli->inc_event_relay_log_pos();
...@@ -435,8 +437,8 @@ int Log_event::exec_event(struct st_relay_log_info* rli) ...@@ -435,8 +437,8 @@ int Log_event::exec_event(struct st_relay_log_info* rli)
rli->inc_group_relay_log_pos(log_pos); rli->inc_group_relay_log_pos(log_pos);
flush_relay_log_info(rli); flush_relay_log_info(rli);
/* /*
Note that Rotate_log_event::exec_event() does not call this function, Note that Rotate_log_event::exec_event() does not call this
so there is no chance that a fake rotate event resets function, so there is no chance that a fake rotate event resets
last_master_timestamp. last_master_timestamp.
*/ */
rli->last_master_timestamp= when; rli->last_master_timestamp= when;
...@@ -667,6 +669,7 @@ Log_event* Log_event::read_log_event(IO_CACHE* file, ...@@ -667,6 +669,7 @@ Log_event* Log_event::read_log_event(IO_CACHE* file,
const Format_description_log_event *description_event) const Format_description_log_event *description_event)
#endif #endif
{ {
DBUG_ENTER("Log_event::read_log_event(IO_CACHE *, Format_description_log_event *");
DBUG_ASSERT(description_event); DBUG_ASSERT(description_event);
char head[LOG_EVENT_MINIMAL_HEADER_LEN]; char head[LOG_EVENT_MINIMAL_HEADER_LEN];
/* /*
...@@ -687,11 +690,11 @@ Log_event* Log_event::read_log_event(IO_CACHE* file, ...@@ -687,11 +690,11 @@ Log_event* Log_event::read_log_event(IO_CACHE* file,
failed my_b_read")); failed my_b_read"));
UNLOCK_MUTEX; UNLOCK_MUTEX;
/* /*
No error here; it could be that we are at the file's end. However if the No error here; it could be that we are at the file's end. However
next my_b_read() fails (below), it will be an error as we were able to if the next my_b_read() fails (below), it will be an error as we
read the first bytes. were able to read the first bytes.
*/ */
return 0; DBUG_RETURN(0);
} }
uint data_len = uint4korr(head + EVENT_LEN_OFFSET); uint data_len = uint4korr(head + EVENT_LEN_OFFSET);
...@@ -733,10 +736,11 @@ failed my_b_read")); ...@@ -733,10 +736,11 @@ failed my_b_read"));
err: err:
UNLOCK_MUTEX; UNLOCK_MUTEX;
if (error) if (!res)
{ {
sql_print_error("\ DBUG_ASSERT(error);
Error in Log_event::read_log_event(): '%s', data_len: %d, event_type: %d", sql_print_error("Error in Log_event::read_log_event(): "
"'%s', data_len: %d, event_type: %d",
error,data_len,head[EVENT_TYPE_OFFSET]); error,data_len,head[EVENT_TYPE_OFFSET]);
my_free(buf, MYF(MY_ALLOW_ZERO_PTR)); my_free(buf, MYF(MY_ALLOW_ZERO_PTR));
/* /*
...@@ -749,7 +753,7 @@ Error in Log_event::read_log_event(): '%s', data_len: %d, event_type: %d", ...@@ -749,7 +753,7 @@ Error in Log_event::read_log_event(): '%s', data_len: %d, event_type: %d",
*/ */
file->error= -1; file->error= -1;
} }
return res; DBUG_RETURN(res);
} }
...@@ -830,14 +834,15 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len, ...@@ -830,14 +834,15 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
ev= NULL; ev= NULL;
break; break;
} }
/* /*
is_valid() are small event-specific sanity tests which are important; for is_valid() are small event-specific sanity tests which are
example there are some my_malloc() in constructors important; for example there are some my_malloc() in constructors
(e.g. Query_log_event::Query_log_event(char*...)); when these my_malloc() (e.g. Query_log_event::Query_log_event(char*...)); when these
fail we can't return an error out of the constructor (because constructor my_malloc() fail we can't return an error out of the constructor
is "void") ; so instead we leave the pointer we wanted to allocate (because constructor is "void") ; so instead we leave the pointer we
(e.g. 'query') to 0 and we test it in is_valid(). Same for wanted to allocate (e.g. 'query') to 0 and we test it in is_valid().
Format_description_log_event, member 'post_header_len'. Same for Format_description_log_event, member 'post_header_len'.
*/ */
if (!ev || !ev->is_valid()) if (!ev || !ev->is_valid())
{ {
...@@ -1279,18 +1284,12 @@ void Query_log_event::print(FILE* file, bool short_form, ...@@ -1279,18 +1284,12 @@ void Query_log_event::print(FILE* file, bool short_form,
my_fwrite(file, (byte*) buff, (uint) (end-buff),MYF(MY_NABP | MY_WME)); my_fwrite(file, (byte*) buff, (uint) (end-buff),MYF(MY_NABP | MY_WME));
if (flags & LOG_EVENT_THREAD_SPECIFIC_F) if (flags & LOG_EVENT_THREAD_SPECIFIC_F)
fprintf(file,"SET @@session.pseudo_thread_id=%lu;\n",(ulong)thread_id); fprintf(file,"SET @@session.pseudo_thread_id=%lu;\n",(ulong)thread_id);
/* /*
Now the session variables; If flags2_inited==0, this is an event from 3.23 or 4.0; nothing to
it's more efficient to pass SQL_MODE as a number instead of a print (remember we don't produce mixed relay logs so there cannot be
comma-separated list. 5.0 events before that one so there is nothing to reset).
FOREIGN_KEY_CHECKS, SQL_AUTO_IS_NULL, UNIQUE_CHECKS are session-only
variables (they have no global version; they're not listed in sql_class.h),
The tests below work for pure binlogs or pure relay logs. Won't work for
mixed relay logs but we don't create mixed relay logs (that is, there is no
relay log with a format change except within the 3 first events, which
mysqlbinlog handles gracefully). So this code should always be good.
*/ */
if (likely(flags2_inited)) /* likely as this will mainly read 5.0 logs */ if (likely(flags2_inited)) /* likely as this will mainly read 5.0 logs */
{ {
/* tmp is a bitmask of bits which have changed. */ /* tmp is a bitmask of bits which have changed. */
...@@ -1319,9 +1318,16 @@ void Query_log_event::print(FILE* file, bool short_form, ...@@ -1319,9 +1318,16 @@ void Query_log_event::print(FILE* file, bool short_form,
} }
/* /*
If flags2_inited==0, this is an event from 3.23 or 4.0; nothing to print Now the session variables;
(remember we don't produce mixed relay logs so there cannot be 5.0 events it's more efficient to pass SQL_MODE as a number instead of a
before that one so there is nothing to reset). comma-separated list.
FOREIGN_KEY_CHECKS, SQL_AUTO_IS_NULL, UNIQUE_CHECKS are session-only
variables (they have no global version; they're not listed in
sql_class.h), The tests below work for pure binlogs or pure relay
logs. Won't work for mixed relay logs but we don't create mixed
relay logs (that is, there is no relay log with a format change
except within the 3 first events, which mysqlbinlog handles
gracefully). So this code should always be good.
*/ */
if (likely(sql_mode_inited)) if (likely(sql_mode_inited))
...@@ -1687,15 +1693,19 @@ int Start_log_event_v3::exec_event(struct st_relay_log_info* rli) ...@@ -1687,15 +1693,19 @@ int Start_log_event_v3::exec_event(struct st_relay_log_info* rli)
} }
/* /*
As a transaction NEVER spans on 2 or more binlogs: As a transaction NEVER spans on 2 or more binlogs:
if we have an active transaction at this point, the master died while if we have an active transaction at this point, the master died
writing the transaction to the binary log, i.e. while flushing the binlog while writing the transaction to the binary log, i.e. while
cache to the binlog. As the write was started, the transaction had been flushing the binlog cache to the binlog. As the write was started,
committed on the master, so we lack of information to replay this the transaction had been committed on the master, so we lack of
transaction on the slave; all we can do is stop with error. information to replay this transaction on the slave; all we can do
Note: this event could be sent by the master to inform us of the format is stop with error.
of its binlog; in other words maybe it is not at its original place when Note: this event could be sent by the master to inform us of the
it comes to us; we'll know this by checking log_pos ("artificial" events format of its binlog; in other words maybe it is not at its
have log_pos == 0). original place when it comes to us; we'll know this by checking
log_pos ("artificial" events have log_pos == 0).
TODO test whether it's really necessary, as slave.cc does ROLLBACK
itself
*/ */
if (!artificial_event && (thd->options & OPTION_BEGIN)) if (!artificial_event && (thd->options & OPTION_BEGIN))
{ {
...@@ -1959,7 +1969,7 @@ int Format_description_log_event::exec_event(struct st_relay_log_info* rli) ...@@ -1959,7 +1969,7 @@ int Format_description_log_event::exec_event(struct st_relay_log_info* rli)
/************************************************************************** /**************************************************************************
Load_log_event methods Load_log_event methods
General note about Load_log_event: the binlogging of LOAD DATA INFILE is General note about Load_log_event: the binlogging of LOAD DATA INFILE is
going to be changed in 5.0 (or maybe in 4.1; not decided yet). going to be changed in 5.0 (or maybe in 5.1; not decided yet).
However, the 5.0 slave could still have to read such events (from a 4.x However, the 5.0 slave could still have to read such events (from a 4.x
master), convert them (which just means maybe expand the header, when 5.0 master), convert them (which just means maybe expand the header, when 5.0
servers have a UID in events) (remember that whatever is after the header servers have a UID in events) (remember that whatever is after the header
...@@ -2978,9 +2988,10 @@ int Rand_log_event::exec_event(struct st_relay_log_info* rli) ...@@ -2978,9 +2988,10 @@ int Rand_log_event::exec_event(struct st_relay_log_info* rli)
#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
void Xid_log_event::pack_info(Protocol *protocol) void Xid_log_event::pack_info(Protocol *protocol)
{ {
char buf[64], *pos; char buf[128], *pos;
pos= strmov(buf, "xid="); pos= strmov(buf, "COMMIT /* xid=");
pos= longlong10_to_str(xid, pos, 10); pos= longlong10_to_str(xid, pos, 10);
pos= strmov(pos, " */");
protocol->store(buf, (uint) (pos-buf), &my_charset_bin); protocol->store(buf, (uint) (pos-buf), &my_charset_bin);
} }
#endif #endif
...@@ -3021,6 +3032,7 @@ void Xid_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_eve ...@@ -3021,6 +3032,7 @@ void Xid_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_eve
fprintf(file, "\tXid = %s\n", buf); fprintf(file, "\tXid = %s\n", buf);
fflush(file); fflush(file);
} }
fprintf(file, "COMMIT;\n");
} }
#endif /* MYSQL_CLIENT */ #endif /* MYSQL_CLIENT */
...@@ -3029,7 +3041,10 @@ void Xid_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_eve ...@@ -3029,7 +3041,10 @@ void Xid_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_eve
int Xid_log_event::exec_event(struct st_relay_log_info* rli) int Xid_log_event::exec_event(struct st_relay_log_info* rli)
{ {
rli->inc_event_relay_log_pos(); rli->inc_event_relay_log_pos();
return 0; /* For a slave Xid_log_event is COMMIT */
thd->options&= ~(ulong) (OPTION_BEGIN | OPTION_STATUS_NO_TRANS_UPDATE);
thd->server_status&= ~SERVER_STATUS_IN_TRANS;
return ha_commit(thd);
} }
#endif /* !MYSQL_CLIENT */ #endif /* !MYSQL_CLIENT */
......
...@@ -303,15 +303,38 @@ struct sql_ex_info ...@@ -303,15 +303,38 @@ struct sql_ex_info
#endif #endif
/* /*
This flag only makes sense for Format_description_log_event. This flag only makes sense for Format_description_log_event. It is set
It is set not when the event is written, but when a binlog file when the event is written, and *reset* when a binlog file is
is closed. It serves as a reliable indicator that binlog was closed (yes, it's the only case when MySQL modifies already written
closed correctly. (Stop_log_event is not enough, there's always part of binlog). Thus it is a reliable indicator that binlog was
a small chance that mysqld crashes in the middle of insert closed correctly. (Stop_log_event is not enough, there's always a
and end of the binlog would look like a Stop_log_event) small chance that mysqld crashes in the middle of insert and end of
the binlog would look like a Stop_log_event).
This flag is used to detect a restart after a crash,
and to provide "unbreakable" binlog. The problem is that on a crash
storage engines rollback automatically, while binlog does not.
To solve this we use this flag and automatically append ROLLBACK
to every non-closed binlog (append virtually, on reading, file itself
is not changed). If this flag is found, mysqlbinlog simply prints "ROLLBACK"
Replication master does not abort on binlog corruption, but takes it as EOF,
and replication slave forces a rollback in this case (see below).
Note, that old binlogs does not have this flag set, so we get a
a backward-compatible behaviour.
*/ */
#define LOG_EVENT_BINLOG_CLOSED_F 0x1 #define LOG_EVENT_BINLOG_IN_USE_F 0x1
/*
This flag is only used for fake Rotate_log_event. When a master, doing
binlog dump, reaches the end of the binlog and fakes a rotate to make
the slave to go to a new file, this flag is used if there was no
"natural" Rotate_log_event.
If this flag is set, slave will execute ROLLBACK before going further
*/
#define LOG_EVENT_FORCE_ROLLBACK_F 0x1
/* /*
If the query depends on the thread (for example: TEMPORARY TABLE). If the query depends on the thread (for example: TEMPORARY TABLE).
...@@ -335,21 +358,22 @@ struct sql_ex_info ...@@ -335,21 +358,22 @@ struct sql_ex_info
#define LOG_EVENT_SUPPRESS_USE_F 0x8 #define LOG_EVENT_SUPPRESS_USE_F 0x8
/* /*
OPTIONS_WRITTEN_TO_BIN_LOG are the bits of thd->options which must be written OPTIONS_WRITTEN_TO_BIN_LOG are the bits of thd->options which must be
to the binlog. OPTIONS_WRITTEN_TO_BINLOG could be written into the written to the binlog. OPTIONS_WRITTEN_TO_BINLOG could be written
Format_description_log_event, so that if later we don't want to replicate a into the Format_description_log_event, so that if later we don't want
variable we did replicate, or the contrary, it's doable. But it should not be to replicate a variable we did replicate, or the contrary, it's
too hard to decide once for all of what we replicate and what we don't, among doable. But it should not be too hard to decide once for all of what
the fixed 32 bits of thd->options. we replicate and what we don't, among the fixed 32 bits of
thd->options.
I (Guilhem) have read through every option's usage, and it looks like I (Guilhem) have read through every option's usage, and it looks like
OPTION_AUTO_IS_NULL and OPTION_NO_FOREIGN_KEYS are the only ones which alter OPTION_AUTO_IS_NULL and OPTION_NO_FOREIGN_KEYS are the only ones
how the query modifies the table. It's good to replicate which alter how the query modifies the table. It's good to replicate
OPTION_RELAXED_UNIQUE_CHECKS too because otherwise, the slave may insert data OPTION_RELAXED_UNIQUE_CHECKS too because otherwise, the slave may
slower than the master, in InnoDB. insert data slower than the master, in InnoDB.
OPTION_BIG_SELECTS is not needed (the slave thread runs with OPTION_BIG_SELECTS is not needed (the slave thread runs with
max_join_size=HA_POS_ERROR) and OPTION_BIG_TABLES is not needed either, as max_join_size=HA_POS_ERROR) and OPTION_BIG_TABLES is not needed
the manual says (because a too big in-memory temp table is automatically either, as the manual says (because a too big in-memory temp table is
written to disk). automatically written to disk).
*/ */
#define OPTIONS_WRITTEN_TO_BIN_LOG (OPTION_AUTO_IS_NULL | \ #define OPTIONS_WRITTEN_TO_BIN_LOG (OPTION_AUTO_IS_NULL | \
OPTION_NO_FOREIGN_KEY_CHECKS | OPTION_RELAXED_UNIQUE_CHECKS) OPTION_NO_FOREIGN_KEY_CHECKS | OPTION_RELAXED_UNIQUE_CHECKS)
...@@ -470,14 +494,15 @@ public: ...@@ -470,14 +494,15 @@ public:
ulong data_written; ulong data_written;
/* /*
The master's server id (is preserved in the relay log; used to prevent from The master's server id (is preserved in the relay log; used to
infinite loops in circular replication). prevent from infinite loops in circular replication).
*/ */
uint32 server_id; uint32 server_id;
/* /*
Some 16 flags. Look above for LOG_EVENT_TIME_F, LOG_EVENT_FORCED_ROTATE_F, Some 16 flags. Look above for LOG_EVENT_TIME_F,
LOG_EVENT_THREAD_SPECIFIC_F, and LOG_EVENT_SUPPRESS_USE_F for notes. LOG_EVENT_FORCED_ROTATE_F, LOG_EVENT_THREAD_SPECIFIC_F, and
LOG_EVENT_SUPPRESS_USE_F for notes.
*/ */
uint16 flags; uint16 flags;
......
...@@ -2684,7 +2684,7 @@ server."); ...@@ -2684,7 +2684,7 @@ server.");
ln= mysql_bin_log.generate_name(opt_bin_logname, "-bin", 1, buf); ln= mysql_bin_log.generate_name(opt_bin_logname, "-bin", 1, buf);
if (ln == buf) if (ln == buf)
{ {
my_free(opt_bin_logname, MYF(0)); my_free(opt_bin_logname, MYF(MY_ALLOW_ZERO_PTR));
opt_bin_logname=my_strdup(buf, MYF(0)); opt_bin_logname=my_strdup(buf, MYF(0));
} }
mysql_bin_log.open_index_file(opt_binlog_index_name, ln); mysql_bin_log.open_index_file(opt_binlog_index_name, ln);
......
...@@ -670,11 +670,11 @@ int terminate_slave_thread(THD* thd, pthread_mutex_t* term_lock, ...@@ -670,11 +670,11 @@ int terminate_slave_thread(THD* thd, pthread_mutex_t* term_lock,
} }
} }
DBUG_ASSERT(thd != 0); DBUG_ASSERT(thd != 0);
THD_CHECK_SENTRY(thd);
/* /*
Is is criticate to test if the slave is running. Otherwise, we might Is is critical to test if the slave is running. Otherwise, we might
be referening freed memory trying to kick it be referening freed memory trying to kick it
*/ */
THD_CHECK_SENTRY(thd);
while (*slave_running) // Should always be true while (*slave_running) // Should always be true
{ {
...@@ -2935,8 +2935,7 @@ static ulong read_event(MYSQL* mysql, MASTER_INFO *mi, bool* suppress_warnings) ...@@ -2935,8 +2935,7 @@ static ulong read_event(MYSQL* mysql, MASTER_INFO *mi, bool* suppress_warnings)
*suppress_warnings= TRUE; *suppress_warnings= TRUE;
} }
else else
sql_print_error("Error reading packet from server: %s (\ sql_print_error("Error reading packet from server: %s ( server_errno=%d)",
server_errno=%d)",
mysql_error(mysql), mysql_errno(mysql)); mysql_error(mysql), mysql_errno(mysql));
return packet_error; return packet_error;
} }
...@@ -3167,7 +3166,21 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) ...@@ -3167,7 +3166,21 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
thd->set_time(); // time the query thd->set_time(); // time the query
thd->lex->current_select= 0; thd->lex->current_select= 0;
if (!ev->when) if (!ev->when)
{
ev->when = time(NULL); ev->when = time(NULL);
/*
fake Rotate: it means that normal execution flow of statements is
interrupted. Let's fake ROLLBACK to undo any half-executed transaction
*/
if (ev->get_type_code() == ROTATE_EVENT &&
ev->flags & LOG_EVENT_FORCE_ROLLBACK_F)
{
ha_rollback_stmt(thd);
ha_rollback(thd);
thd->options&= ~(ulong) (OPTION_BEGIN | OPTION_STATUS_NO_TRANS_UPDATE);
thd->server_status&= ~SERVER_STATUS_IN_TRANS;
}
}
ev->thd = thd; ev->thd = thd;
exec_res = ev->exec_event(rli); exec_res = ev->exec_event(rli);
DBUG_ASSERT(rli->sql_thd==thd); DBUG_ASSERT(rli->sql_thd==thd);
...@@ -3260,7 +3273,6 @@ slave_begin: ...@@ -3260,7 +3273,6 @@ slave_begin:
goto err; goto err;
} }
thd->proc_info = "Connecting to master"; thd->proc_info = "Connecting to master";
// we can get killed during safe_connect // we can get killed during safe_connect
if (!safe_connect(thd, mysql, mi)) if (!safe_connect(thd, mysql, mi))
...@@ -3354,9 +3366,9 @@ after reconnect"); ...@@ -3354,9 +3366,9 @@ after reconnect");
bool suppress_warnings= 0; bool suppress_warnings= 0;
/* /*
We say "waiting" because read_event() will wait if there's nothing to We say "waiting" because read_event() will wait if there's nothing to
read. But if there's something to read, it will not wait. The important read. But if there's something to read, it will not wait. The
thing is to not confuse users by saying "reading" whereas we're in fact important thing is to not confuse users by saying "reading" whereas
receiving nothing. we're in fact receiving nothing.
*/ */
thd->proc_info = "Waiting for master to send event"; thd->proc_info = "Waiting for master to send event";
ulong event_len = read_event(mysql, mi, &suppress_warnings); ulong event_len = read_event(mysql, mi, &suppress_warnings);
...@@ -3870,6 +3882,7 @@ static int process_io_rotate(MASTER_INFO *mi, Rotate_log_event *rev) ...@@ -3870,6 +3882,7 @@ static int process_io_rotate(MASTER_INFO *mi, Rotate_log_event *rev)
if (disconnect_slave_event_count) if (disconnect_slave_event_count)
events_till_disconnect++; events_till_disconnect++;
#endif #endif
/* /*
If description_event_for_queue is format <4, there is conversion in the If description_event_for_queue is format <4, there is conversion in the
relay log to the slave's format (4). And Rotate can mean upgrade or relay log to the slave's format (4). And Rotate can mean upgrade or
...@@ -3893,8 +3906,8 @@ static int process_io_rotate(MASTER_INFO *mi, Rotate_log_event *rev) ...@@ -3893,8 +3906,8 @@ static int process_io_rotate(MASTER_INFO *mi, Rotate_log_event *rev)
} }
/* /*
Reads a 3.23 event and converts it to the slave's format. This code was copied Reads a 3.23 event and converts it to the slave's format. This code was
from MySQL 4.0. copied from MySQL 4.0.
*/ */
static int queue_binlog_ver_1_event(MASTER_INFO *mi, const char *buf, static int queue_binlog_ver_1_event(MASTER_INFO *mi, const char *buf,
ulong event_len) ulong event_len)
...@@ -4157,9 +4170,9 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len) ...@@ -4157,9 +4170,9 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
to write this event again). to write this event again).
*/ */
/* /*
We are the only thread which reads/writes description_event_for_queue. The We are the only thread which reads/writes description_event_for_queue.
relay_log struct does not move (though some members of it can change), so The relay_log struct does not move (though some members of it can
we needn't any lock (no rli->data_lock, no log lock). change), so we needn't any lock (no rli->data_lock, no log lock).
*/ */
Format_description_log_event* tmp; Format_description_log_event* tmp;
const char* errmsg; const char* errmsg;
......
...@@ -301,7 +301,7 @@ public: ...@@ -301,7 +301,7 @@ public:
bool write(THD *thd, const char *query, uint query_length, bool write(THD *thd, const char *query, uint query_length,
time_t query_start=0); time_t query_start=0);
bool write(Log_event* event_info); // binary log write bool write(Log_event* event_info); // binary log write
bool write(THD *thd, IO_CACHE *cache, bool commit_or_rollback); bool write(THD *thd, IO_CACHE *cache);
/* /*
v stands for vector v stands for vector
...@@ -314,11 +314,11 @@ public: ...@@ -314,11 +314,11 @@ public:
void make_log_name(char* buf, const char* log_ident); void make_log_name(char* buf, const char* log_ident);
bool is_active(const char* log_file_name); bool is_active(const char* log_file_name);
int update_log_index(LOG_INFO* linfo, bool need_update_threads); int update_log_index(LOG_INFO* linfo, bool need_update_threads);
int purge_logs(const char *to_log, bool included, int purge_logs(const char *to_log, bool included,
bool need_mutex, bool need_update_threads, bool need_mutex, bool need_update_threads,
ulonglong *decrease_log_space); ulonglong *decrease_log_space);
int purge_logs_before_date(time_t purge_time); int purge_logs_before_date(time_t purge_time);
int purge_first_log(struct st_relay_log_info* rli, bool included); int purge_first_log(struct st_relay_log_info* rli, bool included);
bool reset_logs(THD* thd); bool reset_logs(THD* thd);
void close(uint exiting); void close(uint exiting);
......
...@@ -31,24 +31,29 @@ static int binlog_dump_count = 0; ...@@ -31,24 +31,29 @@ static int binlog_dump_count = 0;
binlog) Rotate event, which contains the name of the binlog we are going to binlog) Rotate event, which contains the name of the binlog we are going to
send to the slave (because the slave may not know it if it just asked for send to the slave (because the slave may not know it if it just asked for
MASTER_LOG_FILE='', MASTER_LOG_POS=4). MASTER_LOG_FILE='', MASTER_LOG_POS=4).
< 4.0.14, fake_rotate_event() was called only if the requested pos was < 4.0.14, fake_rotate_event() was called only if the requested pos was 4.
4. After this version we always call it, so that a 3.23.58 slave can rely on After this version we always call it, so that a 3.23.58 slave can rely on
it to detect if the master is 4.0 (and stop) (the _fake_ Rotate event has it to detect if the master is 4.0 (and stop) (the _fake_ Rotate event has
zeros in the good positions which, by chance, make it possible for the 3.23 zeros in the good positions which, by chance, make it possible for the 3.23
slave to detect that this event is unexpected) (this is luck which happens slave to detect that this event is unexpected) (this is luck which happens
because the master and slave disagree on the size of the header of because the master and slave disagree on the size of the header of
Log_event). Log_event).
Relying on the event length of the Rotate event instead of these well-placed Relying on the event length of the Rotate event instead of these
zeros was not possible as Rotate events have a variable-length part. well-placed zeros was not possible as Rotate events have a variable-length
part.
*/ */
static int fake_rotate_event(NET* net, String* packet, char* log_file_name, static int fake_rotate_event(NET* net, String* packet, char* log_file_name,
ulonglong position, const char** errmsg) ulonglong position, int flags, const char** errmsg)
{ {
DBUG_ENTER("fake_rotate_event"); DBUG_ENTER("fake_rotate_event");
char header[LOG_EVENT_HEADER_LEN], buf[ROTATE_HEADER_LEN]; char header[LOG_EVENT_HEADER_LEN], buf[ROTATE_HEADER_LEN+100];
memset(header, 0, 4); // 'when' (the timestamp) does not matter, is set to 0 /*
'when' (the timestamp) is set to 0 so that slave could distinguish between
real and fake Rotate events (if necessary)
*/
memset(header, 0, 4);
header[EVENT_TYPE_OFFSET] = ROTATE_EVENT; header[EVENT_TYPE_OFFSET] = ROTATE_EVENT;
char* p = log_file_name+dirname_length(log_file_name); char* p = log_file_name+dirname_length(log_file_name);
...@@ -56,11 +61,11 @@ static int fake_rotate_event(NET* net, String* packet, char* log_file_name, ...@@ -56,11 +61,11 @@ static int fake_rotate_event(NET* net, String* packet, char* log_file_name,
ulong event_len = ident_len + LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN; ulong event_len = ident_len + LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN;
int4store(header + SERVER_ID_OFFSET, server_id); int4store(header + SERVER_ID_OFFSET, server_id);
int4store(header + EVENT_LEN_OFFSET, event_len); int4store(header + EVENT_LEN_OFFSET, event_len);
int2store(header + FLAGS_OFFSET, 0); int2store(header + FLAGS_OFFSET, flags);
// TODO: check what problems this may cause and fix them // TODO: check what problems this may cause and fix them
int4store(header + LOG_POS_OFFSET, 0); int4store(header + LOG_POS_OFFSET, 0);
packet->append(header, sizeof(header)); packet->append(header, sizeof(header));
int8store(buf+R_POS_OFFSET,position); int8store(buf+R_POS_OFFSET,position);
packet->append(buf, ROTATE_HEADER_LEN); packet->append(buf, ROTATE_HEADER_LEN);
...@@ -276,7 +281,7 @@ bool purge_master_logs_before_date(THD* thd, time_t purge_time) ...@@ -276,7 +281,7 @@ bool purge_master_logs_before_date(THD* thd, time_t purge_time)
int test_for_non_eof_log_read_errors(int error, const char **errmsg) int test_for_non_eof_log_read_errors(int error, const char **errmsg)
{ {
if (error == LOG_READ_EOF) if (error == LOG_READ_EOF)
return 0; return 0;
my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG; my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
switch (error) { switch (error) {
...@@ -321,6 +326,7 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos, ...@@ -321,6 +326,7 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
const char *errmsg = "Unknown error"; const char *errmsg = "Unknown error";
NET* net = &thd->net; NET* net = &thd->net;
pthread_mutex_t *log_lock; pthread_mutex_t *log_lock;
bool binlog_can_be_corrupted= FALSE, rotate_was_found=FALSE;
#ifndef DBUG_OFF #ifndef DBUG_OFF
int left_events = max_binlog_dump_events; int left_events = max_binlog_dump_events;
#endif #endif
...@@ -388,37 +394,38 @@ impossible position"; ...@@ -388,37 +394,38 @@ impossible position";
/* /*
Tell the client about the log name with a fake Rotate event; Tell the client about the log name with a fake Rotate event;
this is needed even if we also send a Format_description_log_event just this is needed even if we also send a Format_description_log_event
after, because that event does not contain the binlog's name. just after, because that event does not contain the binlog's name.
Note that as this Rotate event is sent before Format_description_log_event, Note that as this Rotate event is sent before
the slave cannot have any info to understand this event's format, so the Format_description_log_event, the slave cannot have any info to
header len of Rotate_log_event is FROZEN understand this event's format, so the header len of
(so in 5.0 it will have a header shorter than other events except Rotate_log_event is FROZEN (so in 5.0 it will have a header shorter
FORMAT_DESCRIPTION_EVENT). than other events except FORMAT_DESCRIPTION_EVENT).
Before 4.0.14 we called fake_rotate_event below only if Before 4.0.14 we called fake_rotate_event below only if (pos ==
(pos == BIN_LOG_HEADER_SIZE), because if this is false then the slave BIN_LOG_HEADER_SIZE), because if this is false then the slave
already knows the binlog's name. already knows the binlog's name.
Since, we always call fake_rotate_event; if the slave already knew the log's Since, we always call fake_rotate_event; if the slave already knew
name (ex: CHANGE MASTER TO MASTER_LOG_FILE=...) this is useless but does the log's name (ex: CHANGE MASTER TO MASTER_LOG_FILE=...) this is
not harm much. It is nice for 3.23 (>=.58) slaves which test Rotate events useless but does not harm much. It is nice for 3.23 (>=.58) slaves
to see if the master is 4.0 (then they choose to stop because they can't which test Rotate events to see if the master is 4.0 (then they
replicate 4.0); by always calling fake_rotate_event we are sure that choose to stop because they can't replicate 4.0); by always calling
3.23.58 and newer will detect the problem as soon as replication starts fake_rotate_event we are sure that 3.23.58 and newer will detect the
(BUG#198). problem as soon as replication starts (BUG#198).
Always calling fake_rotate_event makes sending of normal Always calling fake_rotate_event makes sending of normal
(=from-binlog) Rotate events a priori unneeded, but it is not so simple: (=from-binlog) Rotate events a priori unneeded, but it is not so
the 2 Rotate events are not equivalent, the normal one is before the Stop simple: the 2 Rotate events are not equivalent, the normal one is
event, the fake one is after. If we don't send the normal one, then the before the Stop event, the fake one is after. If we don't send the
Stop event will be interpreted (by existing 4.0 slaves) as "the master normal one, then the Stop event will be interpreted (by existing 4.0
stopped", which is wrong. So for safety, given that we want minimum slaves) as "the master stopped", which is wrong. So for safety,
modification of 4.0, we send the normal and fake Rotates. given that we want minimum modification of 4.0, we send the normal
and fake Rotates.
*/ */
if (fake_rotate_event(net, packet, log_file_name, pos, &errmsg)) if (fake_rotate_event(net, packet, log_file_name, pos, 0, &errmsg))
{ {
/* /*
This error code is not perfect, as fake_rotate_event() does not read This error code is not perfect, as fake_rotate_event() does not
anything from the binlog; if it fails it's because of an error in read anything from the binlog; if it fails it's because of an
my_net_write(), fortunately it will say it in errmsg. error in my_net_write(), fortunately it will say so in errmsg.
*/ */
my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG; my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
goto err; goto err;
...@@ -426,30 +433,35 @@ impossible position"; ...@@ -426,30 +433,35 @@ impossible position";
packet->set("\0", 1, &my_charset_bin); packet->set("\0", 1, &my_charset_bin);
/* /*
We can set log_lock now, it does not move (it's a member of mysql_bin_log, We can set log_lock now, it does not move (it's a member of
and it's already inited, and it will be destroyed only at shutdown). mysql_bin_log, and it's already inited, and it will be destroyed
only at shutdown).
*/ */
log_lock = mysql_bin_log.get_log_lock(); log_lock = mysql_bin_log.get_log_lock();
if (pos > BIN_LOG_HEADER_SIZE) if (pos > BIN_LOG_HEADER_SIZE)
{ {
/* Try to find a Format_description_log_event at the beginning of the binlog */ /*
Try to find a Format_description_log_event at the beginning of
the binlog
*/
if (!(error = Log_event::read_log_event(&log, packet, log_lock))) if (!(error = Log_event::read_log_event(&log, packet, log_lock)))
{ {
/* /*
The packet has offsets equal to the normal offsets in a binlog event The packet has offsets equal to the normal offsets in a binlog
+1 (the first character is \0). event +1 (the first character is \0).
*/ */
DBUG_PRINT("info", DBUG_PRINT("info",
("Looked for a Format_description_log_event, found event type %d", ("Looked for a Format_description_log_event, found event type %d",
(*packet)[EVENT_TYPE_OFFSET+1])); (*packet)[EVENT_TYPE_OFFSET+1]));
if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT) if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
{ {
binlog_can_be_corrupted= (*packet)[FLAGS_OFFSET+1] & LOG_EVENT_BINLOG_IN_USE_F;
/* /*
mark that this event with "log_pos=0", so the slave mark that this event with "log_pos=0", so the slave
should not increment master's binlog position should not increment master's binlog position
(rli->group_master_log_pos) (rli->group_master_log_pos)
*/ */
int4store(packet->c_ptr() +LOG_POS_OFFSET+1,0); int4store(packet->c_ptr()+LOG_POS_OFFSET+1, 0);
/* send it */ /* send it */
if (my_net_write(net, (char*)packet->ptr(), packet->length())) if (my_net_write(net, (char*)packet->ptr(), packet->length()))
{ {
...@@ -458,24 +470,25 @@ impossible position"; ...@@ -458,24 +470,25 @@ impossible position";
goto err; goto err;
} }
/* /*
No need to save this event. We are only doing simple reads (no real No need to save this event. We are only doing simple reads
parsing of the events) so we don't need it. And so we don't need the (no real parsing of the events) so we don't need it. And so
artificial Format_description_log_event of 3.23&4.x. we don't need the artificial Format_description_log_event of
3.23&4.x.
*/ */
} }
} }
else else
if (test_for_non_eof_log_read_errors(error, &errmsg)) if (test_for_non_eof_log_read_errors(error, &errmsg))
goto err; goto err;
/* /*
else: it's EOF, nothing to do, go on reading next events, the else: it's EOF, nothing to do, go on reading next events, the
Format_description_log_event will be found naturally if it is written. Format_description_log_event will be found naturally if it is written.
*/ */
/* reset the packet as we wrote to it in any case */ /* reset the packet as we wrote to it in any case */
packet->set("\0", 1, &my_charset_bin); packet->set("\0", 1, &my_charset_bin);
} /* end of if (pos > BIN_LOG_HEADER_SIZE); if false, the Format_description_log_event } /* end of if (pos > BIN_LOG_HEADER_SIZE); if false, the
event will be found naturally. */ Format_description_log_event event will be found naturally. */
/* seek to the requested position, to start the requested dump */ /* seek to the requested position, to start the requested dump */
my_b_seek(&log, pos); // Seek will done on next read my_b_seek(&log, pos); // Seek will done on next read
...@@ -492,6 +505,14 @@ impossible position"; ...@@ -492,6 +505,14 @@ impossible position";
goto err; goto err;
} }
#endif #endif
if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
binlog_can_be_corrupted= (*packet)[FLAGS_OFFSET+1] & LOG_EVENT_BINLOG_IN_USE_F;
else if ((*packet)[EVENT_TYPE_OFFSET+1] == STOP_EVENT)
binlog_can_be_corrupted= FALSE;
else if ((*packet)[EVENT_TYPE_OFFSET+1] == ROTATE_EVENT)
rotate_was_found=TRUE;
if (my_net_write(net, (char*)packet->ptr(), packet->length())) if (my_net_write(net, (char*)packet->ptr(), packet->length()))
{ {
errmsg = "Failed on my_net_write()"; errmsg = "Failed on my_net_write()";
...@@ -511,19 +532,25 @@ impossible position"; ...@@ -511,19 +532,25 @@ impossible position";
} }
packet->set("\0", 1, &my_charset_bin); packet->set("\0", 1, &my_charset_bin);
} }
/*
here we were reading binlog that was not closed properly (as a result
of a crash ?). treat any corruption as EOF
*/
if (binlog_can_be_corrupted && error != LOG_READ_MEM)
error=LOG_READ_EOF;
/* /*
TODO: now that we are logging the offset, check to make sure TODO: now that we are logging the offset, check to make sure
the recorded offset and the actual match. the recorded offset and the actual match.
Guilhem 2003-06: this is not true if this master is a slave <4.0.15 Guilhem 2003-06: this is not true if this master is a slave
running with --log-slave-updates, because then log_pos may be the offset <4.0.15 running with --log-slave-updates, because then log_pos may
in the-master-of-this-master's binlog. be the offset in the-master-of-this-master's binlog.
*/ */
if (test_for_non_eof_log_read_errors(error, &errmsg)) if (test_for_non_eof_log_read_errors(error, &errmsg))
goto err; goto err;
if (!(flags & BINLOG_DUMP_NON_BLOCK) && if (!(flags & BINLOG_DUMP_NON_BLOCK) &&
mysql_bin_log.is_active(log_file_name)) mysql_bin_log.is_active(log_file_name))
{ {
/* /*
Block until there is more data in the log Block until there is more data in the log
...@@ -559,9 +586,9 @@ impossible position"; ...@@ -559,9 +586,9 @@ impossible position";
now, but we'll be quick and just read one record now, but we'll be quick and just read one record
TODO: TODO:
Add an counter that is incremented for each time we update Add an counter that is incremented for each time we update the
the binary log. We can avoid the following read if the counter binary log. We can avoid the following read if the counter
has not been updated since last read. has not been updated since last read.
*/ */
pthread_mutex_lock(log_lock); pthread_mutex_lock(log_lock);
...@@ -654,20 +681,23 @@ impossible position"; ...@@ -654,20 +681,23 @@ impossible position";
(void) my_close(file, MYF(MY_WME)); (void) my_close(file, MYF(MY_WME));
/* /*
Call fake_rotate_event() in case the previous log (the one which we have Call fake_rotate_event() in case the previous log (the one which
just finished reading) did not contain a Rotate event (for example (I we have just finished reading) did not contain a Rotate event
don't know any other example) the previous log was the last one before (for example (I don't know any other example) the previous log
the master was shutdown & restarted). was the last one before the master was shutdown & restarted).
This way we tell the slave about the new log's name and position. This way we tell the slave about the new log's name and
If the binlog is 5.0, the next event we are going to read and send is position. If the binlog is 5.0, the next event we are going to
Format_description_log_event. read and send is Format_description_log_event.
*/ */
if ((file=open_binlog(&log, log_file_name, &errmsg)) < 0 || if ((file=open_binlog(&log, log_file_name, &errmsg)) < 0 ||
fake_rotate_event(net, packet, log_file_name, BIN_LOG_HEADER_SIZE, &errmsg)) fake_rotate_event(net, packet, log_file_name, BIN_LOG_HEADER_SIZE,
rotate_was_found ? 0 : LOG_EVENT_FORCE_ROLLBACK_F,
&errmsg))
{ {
my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG; my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
goto err; goto err;
} }
rotate_was_found=FALSE;
packet->length(0); packet->length(0);
packet->append('\0'); packet->append('\0');
} }
...@@ -708,17 +738,17 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report) ...@@ -708,17 +738,17 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report)
int slave_errno= 0; int slave_errno= 0;
int thread_mask; int thread_mask;
DBUG_ENTER("start_slave"); DBUG_ENTER("start_slave");
if (check_access(thd, SUPER_ACL, any_db,0,0,0)) if (check_access(thd, SUPER_ACL, any_db,0,0,0))
DBUG_RETURN(1); DBUG_RETURN(1);
lock_slave_threads(mi); // this allows us to cleanly read slave_running lock_slave_threads(mi); // this allows us to cleanly read slave_running
// Get a mask of _stopped_ threads // Get a mask of _stopped_ threads
init_thread_mask(&thread_mask,mi,1 /* inverse */); init_thread_mask(&thread_mask,mi,1 /* inverse */);
/* /*
Below we will start all stopped threads. Below we will start all stopped threads. But if the user wants to
But if the user wants to start only one thread, do as if the other thread start only one thread, do as if the other thread was running (as we
was running (as we don't wan't to touch the other thread), so set the don't wan't to touch the other thread), so set the bit to 0 for the
bit to 0 for the other thread other thread
*/ */
if (thd->lex->slave_thd_opt) if (thd->lex->slave_thd_opt)
thread_mask&= thd->lex->slave_thd_opt; thread_mask&= thd->lex->slave_thd_opt;
...@@ -729,9 +759,9 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report) ...@@ -729,9 +759,9 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report)
slave_errno=ER_MASTER_INFO; slave_errno=ER_MASTER_INFO;
else if (server_id_supplied && *mi->host) else if (server_id_supplied && *mi->host)
{ {
/* /*
If we will start SQL thread we will care about UNTIL options If we will start SQL thread we will care about UNTIL options If
If not and they are specified we will ignore them and warn user not and they are specified we will ignore them and warn user
about this fact. about this fact.
*/ */
if (thread_mask & SLAVE_SQL) if (thread_mask & SLAVE_SQL)
...@@ -742,13 +772,13 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report) ...@@ -742,13 +772,13 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report)
{ {
mi->rli.until_condition= RELAY_LOG_INFO::UNTIL_MASTER_POS; mi->rli.until_condition= RELAY_LOG_INFO::UNTIL_MASTER_POS;
mi->rli.until_log_pos= thd->lex->mi.pos; mi->rli.until_log_pos= thd->lex->mi.pos;
/* /*
We don't check thd->lex->mi.log_file_name for NULL here We don't check thd->lex->mi.log_file_name for NULL here
since it is checked in sql_yacc.yy since it is checked in sql_yacc.yy
*/ */
strmake(mi->rli.until_log_name, thd->lex->mi.log_file_name, strmake(mi->rli.until_log_name, thd->lex->mi.log_file_name,
sizeof(mi->rli.until_log_name)-1); sizeof(mi->rli.until_log_name)-1);
} }
else if (thd->lex->mi.relay_log_pos) else if (thd->lex->mi.relay_log_pos)
{ {
mi->rli.until_condition= RELAY_LOG_INFO::UNTIL_RELAY_POS; mi->rli.until_condition= RELAY_LOG_INFO::UNTIL_RELAY_POS;
...@@ -772,15 +802,15 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report) ...@@ -772,15 +802,15 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report)
p_end points to the first invalid character. If it equals p_end points to the first invalid character. If it equals
to p, no digits were found, error. If it contains '\0' it to p, no digits were found, error. If it contains '\0' it
means conversion went ok. means conversion went ok.
*/ */
if (p_end==p || *p_end) if (p_end==p || *p_end)
slave_errno=ER_BAD_SLAVE_UNTIL_COND; slave_errno=ER_BAD_SLAVE_UNTIL_COND;
} }
else else
slave_errno=ER_BAD_SLAVE_UNTIL_COND; slave_errno=ER_BAD_SLAVE_UNTIL_COND;
/* mark the cached result of the UNTIL comparison as "undefined" */ /* mark the cached result of the UNTIL comparison as "undefined" */
mi->rli.until_log_names_cmp_result= mi->rli.until_log_names_cmp_result=
RELAY_LOG_INFO::UNTIL_LOG_NAMES_CMP_UNKNOWN; RELAY_LOG_INFO::UNTIL_LOG_NAMES_CMP_UNKNOWN;
/* Issuing warning then started without --skip-slave-start */ /* Issuing warning then started without --skip-slave-start */
...@@ -788,14 +818,13 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report) ...@@ -788,14 +818,13 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report)
push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_MISSING_SKIP_SLAVE, push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_MISSING_SKIP_SLAVE,
ER(ER_MISSING_SKIP_SLAVE)); ER(ER_MISSING_SKIP_SLAVE));
} }
pthread_mutex_unlock(&mi->rli.data_lock); pthread_mutex_unlock(&mi->rli.data_lock);
} }
else if (thd->lex->mi.pos || thd->lex->mi.relay_log_pos) else if (thd->lex->mi.pos || thd->lex->mi.relay_log_pos)
push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_UNTIL_COND_IGNORED, push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_UNTIL_COND_IGNORED,
ER(ER_UNTIL_COND_IGNORED)); ER(ER_UNTIL_COND_IGNORED));
if (!slave_errno) if (!slave_errno)
slave_errno = start_slave_threads(0 /*no mutex */, slave_errno = start_slave_threads(0 /*no mutex */,
1 /* wait for start */, 1 /* wait for start */,
...@@ -810,9 +839,9 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report) ...@@ -810,9 +839,9 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report)
//no error if all threads are already started, only a warning //no error if all threads are already started, only a warning
push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_SLAVE_WAS_RUNNING, push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_SLAVE_WAS_RUNNING,
ER(ER_SLAVE_WAS_RUNNING)); ER(ER_SLAVE_WAS_RUNNING));
unlock_slave_threads(mi); unlock_slave_threads(mi);
if (slave_errno) if (slave_errno)
{ {
if (net_report) if (net_report)
...@@ -912,7 +941,7 @@ int reset_slave(THD *thd, MASTER_INFO* mi) ...@@ -912,7 +941,7 @@ int reset_slave(THD *thd, MASTER_INFO* mi)
1 /* just reset */, 1 /* just reset */,
&errmsg))) &errmsg)))
goto err; goto err;
/* /*
Clear master's log coordinates and reset host/user/etc to the values Clear master's log coordinates and reset host/user/etc to the values
specified in mysqld's options (only for good display of SHOW SLAVE STATUS; specified in mysqld's options (only for good display of SHOW SLAVE STATUS;
...@@ -921,13 +950,13 @@ int reset_slave(THD *thd, MASTER_INFO* mi) ...@@ -921,13 +950,13 @@ int reset_slave(THD *thd, MASTER_INFO* mi)
STATUS; before doing START SLAVE; STATUS; before doing START SLAVE;
*/ */
init_master_info_with_options(mi); init_master_info_with_options(mi);
/* /*
Reset errors, and master timestamp (the idea is that we forget about the Reset errors, and master timestamp (the idea is that we forget about the
old master). old master).
*/ */
clear_slave_error_timestamp(&mi->rli); clear_slave_error_timestamp(&mi->rli);
clear_until_condition(&mi->rli); clear_until_condition(&mi->rli);
// close master_info_file, relay_log_info_file, set mi->inited=rli->inited=0 // close master_info_file, relay_log_info_file, set mi->inited=rli->inited=0
end_master_info(mi); end_master_info(mi);
// and delete these two files // and delete these two files
...@@ -1243,7 +1272,7 @@ bool mysql_show_binlog_events(THD* thd) ...@@ -1243,7 +1272,7 @@ bool mysql_show_binlog_events(THD* thd)
IO_CACHE log; IO_CACHE log;
File file = -1; File file = -1;
Format_description_log_event *description_event= new Format_description_log_event *description_event= new
Format_description_log_event(3); /* MySQL 4.0 by default */ Format_description_log_event(3); /* MySQL 4.0 by default */
Log_event::init_show_field_list(&field_list); Log_event::init_show_field_list(&field_list);
if (protocol->send_fields(&field_list, if (protocol->send_fields(&field_list,
...@@ -1260,7 +1289,7 @@ bool mysql_show_binlog_events(THD* thd) ...@@ -1260,7 +1289,7 @@ bool mysql_show_binlog_events(THD* thd)
pthread_mutex_t *log_lock = mysql_bin_log.get_log_lock(); pthread_mutex_t *log_lock = mysql_bin_log.get_log_lock();
LOG_INFO linfo; LOG_INFO linfo;
Log_event* ev; Log_event* ev;
limit_start= thd->lex->current_select->offset_limit; limit_start= thd->lex->current_select->offset_limit;
limit_end= thd->lex->current_select->select_limit + limit_start; limit_end= thd->lex->current_select->select_limit + limit_start;
...@@ -1284,15 +1313,15 @@ bool mysql_show_binlog_events(THD* thd) ...@@ -1284,15 +1313,15 @@ bool mysql_show_binlog_events(THD* thd)
pthread_mutex_lock(log_lock); pthread_mutex_lock(log_lock);
/* /*
open_binlog() sought to position 4. open_binlog() sought to position 4.
Read the first event in case it's a Format_description_log_event, to know the Read the first event in case it's a Format_description_log_event, to
format. If there's no such event, we are 3.23 or 4.x. This code, like know the format. If there's no such event, we are 3.23 or 4.x. This
before, can't read 3.23 binlogs. code, like before, can't read 3.23 binlogs.
This code will fail on a mixed relay log (one which has Format_desc then This code will fail on a mixed relay log (one which has Format_desc then
Rotate then Format_desc). Rotate then Format_desc).
*/ */
ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,description_event); ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,description_event);
if (ev) if (ev)
{ {
...@@ -1312,7 +1341,7 @@ bool mysql_show_binlog_events(THD* thd) ...@@ -1312,7 +1341,7 @@ bool mysql_show_binlog_events(THD* thd)
errmsg="Invalid Format_description event; could be out of memory"; errmsg="Invalid Format_description event; could be out of memory";
goto err; goto err;
} }
for (event_count = 0; for (event_count = 0;
(ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,description_event)); ) (ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,description_event)); )
{ {
......
...@@ -36,7 +36,11 @@ extern I_List<i_string> binlog_do_db, binlog_ignore_db; ...@@ -36,7 +36,11 @@ extern I_List<i_string> binlog_do_db, binlog_ignore_db;
extern int max_binlog_dump_events; extern int max_binlog_dump_events;
extern my_bool opt_sporadic_binlog_dump_fail; extern my_bool opt_sporadic_binlog_dump_fail;
#define KICK_SLAVE(thd) { pthread_mutex_lock(&(thd)->LOCK_delete); (thd)->awake(THD::NOT_KILLED); pthread_mutex_unlock(&(thd)->LOCK_delete); } #define KICK_SLAVE(thd) do { \
pthread_mutex_lock(&(thd)->LOCK_delete); \
(thd)->awake(THD::NOT_KILLED); \
pthread_mutex_unlock(&(thd)->LOCK_delete); \
} while(0)
int start_slave(THD* thd, MASTER_INFO* mi, bool net_report); int start_slave(THD* thd, MASTER_INFO* mi, bool net_report);
int stop_slave(THD* thd, MASTER_INFO* mi, bool net_report); int stop_slave(THD* thd, MASTER_INFO* mi, bool net_report);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment