Commit 706c365e authored by unknown's avatar unknown

MDEV-26: Global transaction id. Partial commit.

parent ea8f1ca9
......@@ -5278,6 +5278,67 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
DBUG_RETURN(error);
}
/* Generate a new global transaction ID, and write it to the binlog */
bool
MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone,
bool is_transactional)
{
rpl_gtid gtid;
uint64 seq_no;
seq_no= thd->variables.gtid_seq_no;
/*
Reset the session variable gtid_seq_no, to reduce the risk of accidentally
producing a duplicate GTID.
*/
thd->variables.gtid_seq_no= 0;
if (seq_no != 0)
{
/*
If we see a higher sequence number, use that one as the basis of any
later generated sequence numbers.
This way, in simple tree replication topologies with just one master
generating events at any point in time, sequence number will always be
monotonic irrespectively of server_id. Only if events are produced in
parallel on multiple master servers will sequence id be non-monotonic
and server id needed to distinguish.
We will not rely on this in the server code, but it makes things
conceptually easier to understand for the DBA.
*/
mysql_mutex_lock(&LOCK_gtid_counter);
if (global_gtid_counter < seq_no)
global_gtid_counter= seq_no;
mysql_mutex_unlock(&LOCK_gtid_counter);
}
else
{
mysql_mutex_lock(&LOCK_gtid_counter);
seq_no= ++global_gtid_counter;
mysql_mutex_unlock(&LOCK_gtid_counter);
}
gtid.seq_no= seq_no;
gtid.domain_id= thd->variables.gtid_domain_id;
Gtid_log_event gtid_event(thd, gtid.seq_no, gtid.domain_id, standalone,
LOG_EVENT_SUPPRESS_USE_F, is_transactional);
gtid.server_id= gtid_event.server_id;
/* Write the event to the binary log. */
if (gtid_event.write(&mysql_bin_log.log_file))
return true;
status_var_add(thd->status_var.binlog_bytes_written, gtid_event.data_written);
/* Update the replication state (last GTID in each replication domain). */
mysql_mutex_lock(&LOCK_rpl_gtid_state);
global_rpl_gtid_state.update(&gtid);
mysql_mutex_unlock(&LOCK_rpl_gtid_state);
return false;
}
/**
Write an event to the binary log. If with_annotate != NULL and
*with_annotate = TRUE write also Annotate_rows before the event
......@@ -5347,6 +5408,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
my_org_b_tell= my_b_tell(file);
mysql_mutex_lock(&LOCK_log);
prev_binlog_id= current_binlog_id;
write_gtid_event(thd, true, using_trans);
}
else
{
......@@ -6219,19 +6281,6 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd,
break;
}
/*
Log "BEGIN" at the beginning of every transaction. Here, a transaction is
either a BEGIN..COMMIT block or a single statement in autocommit mode.
Create the necessary events here, where we have the correct THD (and
thread context).
Due to group commit the actual writing to binlog may happen in a different
thread.
*/
Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), using_trx_cache, TRUE,
TRUE, 0);
entry.begin_event= &qinfo;
entry.end_event= end_ev;
if (cache_mngr->stmt_cache.has_incident() ||
cache_mngr->trx_cache.has_incident())
......@@ -6607,10 +6656,8 @@ MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry)
{
binlog_cache_mngr *mngr= entry->cache_mngr;
if (entry->begin_event->write(&log_file))
if (write_gtid_event(entry->thd, false, entry->using_trx_cache))
return ER_ERROR_ON_WRITE;
status_var_add(entry->thd->status_var.binlog_bytes_written,
entry->begin_event->data_written);
if (entry->using_stmt_cache && !mngr->stmt_cache.empty() &&
write_cache(entry->thd, mngr->get_binlog_cache_log(FALSE)))
......
......@@ -420,11 +420,10 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
bool using_stmt_cache;
bool using_trx_cache;
/*
Extra events (BEGIN, COMMIT/ROLLBACK/XID, and possibly INCIDENT) to be
Extra events (COMMIT/ROLLBACK/XID, and possibly INCIDENT) to be
written during group commit. The incident_event is only valid if
trx_data->has_incident() is true.
*/
Log_event *begin_event;
Log_event *end_event;
Log_event *incident_event;
/* Set during group commit to record any per-thread error. */
......@@ -771,6 +770,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
inline IO_CACHE *get_index_file() { return &index_file;}
inline uint32 get_open_count() { return open_count; }
void set_status_variables(THD *thd);
bool write_gtid_event(THD *thd, bool standalone, bool is_transactional);
};
class Log_event_handler
......
This diff is collapsed.
......@@ -260,6 +260,8 @@ struct sql_ex_info
#define HEARTBEAT_HEADER_LEN 0
#define ANNOTATE_ROWS_HEADER_LEN 0
#define BINLOG_CHECKPOINT_HEADER_LEN 4
#define GTID_HEADER_LEN 19
#define GTID_LIST_HEADER_LEN 4
/*
Max number of possible extra bytes in a replication event compared to a
......@@ -599,16 +601,13 @@ enum enum_binlog_checksum_alg {
because they mis-compute the offsets into the master's binlog).
*/
#define MARIA_SLAVE_CAPABILITY_TOLERATE_HOLES 2
/* MariaDB > 5.5, which knows about binlog_checkpoint_log_event. */
/* MariaDB >= 10.0, which knows about binlog_checkpoint_log_event. */
#define MARIA_SLAVE_CAPABILITY_BINLOG_CHECKPOINT 3
/*
MariaDB server which understands MySQL 5.6 ignorable events. This server
can tolerate receiving any event with the LOG_EVENT_IGNORABLE_F flag set.
*/
#define MARIA_SLAVE_CAPABILITY_IGNORABLE 4
/* MariaDB >= 10.0.1, which knows about global transaction id events. */
#define MARIA_SLAVE_CAPABILITY_GTID 4
/* Our capability. */
#define MARIA_SLAVE_CAPABILITY_MINE MARIA_SLAVE_CAPABILITY_BINLOG_CHECKPOINT
#define MARIA_SLAVE_CAPABILITY_MINE MARIA_SLAVE_CAPABILITY_GTID
/**
......@@ -694,6 +693,18 @@ enum Log_event_type
that are prepared in storage engines but not yet committed.
*/
BINLOG_CHECKPOINT_EVENT= 161,
/*
Gtid event. For global transaction ID, used to start a new event group,
instead of the old BEGIN query event, and also to mark stand-alone
events.
*/
GTID_EVENT= 162,
/*
Gtid list event. Logged at the start of every binlog, to record the
current replication state. This consists of the last GTID seen for
each replication domain.
*/
GTID_LIST_EVENT= 163,
/* Add new MariaDB events here - right above this comment! */
......@@ -766,6 +777,11 @@ typedef struct st_print_event_info
uint charset_database_number;
uint thread_id;
bool thread_id_printed;
uint32 server_id;
bool server_id_printed;
uint32 domain_id;
bool domain_id_printed;
/*
Track when @@skip_replication changes so we need to output a SET
statement for it.
......@@ -1874,6 +1890,7 @@ class Query_log_event: public Log_event
}
Log_event_type get_type_code() { return QUERY_EVENT; }
static int dummy_event(String *packet, ulong ev_offset, uint8 checksum_alg);
static int begin_event(String *packet, ulong ev_offset, uint8 checksum_alg);
#ifdef MYSQL_SERVER
bool write(IO_CACHE* file);
virtual bool write_post_header_for_derived(IO_CACHE* file) { return FALSE; }
......@@ -2927,6 +2944,210 @@ class Binlog_checkpoint_log_event: public Log_event
#endif
};
struct rpl_gtid
{
uint32 domain_id;
uint32 server_id;
uint64 seq_no;
};
struct rpl_state
{
HASH hash;
rpl_state();
~rpl_state();
ulong count() const { return hash.records; }
int update(const struct rpl_gtid *gtid);
};
extern rpl_state global_rpl_gtid_state;
/**
@class Gtid_log_event
This event is logged as part of every event group to give the global
transaction id (GTID) of that group.
It replaces the BEGIN query event used in earlier versions to begin most
event groups, but is also used for events that used to be stand-alone.
@section Gtid_log_event_binary_format Binary Format
The binary format for Gtid_log_event has 6 extra reserved bytes to make the
length a total of 19 byte (+ 19 bytes of header in common with all events).
This is just the minimal size for a BEGIN query event, which makes it easy
to replace this event with such BEGIN event to remain compatible with old
slave servers.
<table>
<caption>Post-Header</caption>
<tr>
<th>Name</th>
<th>Format</th>
<th>Description</th>
</tr>
<tr>
<td>seq_no</td>
<td>8 byte unsigned integer</td>
<td>increasing id within one server_id. Starts at 1, holes in the sequence
may occur</td>
</tr>
<tr>
<td>domain_id</td>
<td>4 byte unsigned integer</td>
<td>Replication domain id, identifying independent replication streams></td>
</tr>
<tr>
<td>flags</td>
<td>1 byte bitfield</td>
<td>Bit 0 set indicates stand-alone event (no terminating COMMIT)</td>
</tr>
<tr>
<td>Reserved</td>
<td>6 bytes</td>
<td>Reserved bytes, set to 0. Maybe be used for future expansion.</td>
</tr>
</table>
The Body of Gtid_log_event is empty. The total event size is 19 bytes +
the normal 19 bytes common-header.
*/
class Gtid_log_event: public Log_event
{
public:
uint64 seq_no;
uint32 domain_id;
uchar flags2;
/* Flags2. */
/* FL_STANDALONE is set when there is no terminating COMMIT event. */
static const uchar FL_STANDALONE= 1;
#ifdef MYSQL_SERVER
Gtid_log_event(THD *thd_arg, uint64 seq_no, uint32 domain_id, bool standalone,
uint16 flags, bool is_transactional);
#ifdef HAVE_REPLICATION
void pack_info(THD *thd, Protocol *protocol);
virtual int do_apply_event(Relay_log_info const *rli);
virtual int do_update_pos(Relay_log_info *rli);
virtual enum_skip_reason do_shall_skip(Relay_log_info *rli);
#endif
#else
void print(FILE *file, PRINT_EVENT_INFO *print_event_info);
#endif
Gtid_log_event(const char *buf, uint event_len,
const Format_description_log_event *description_event);
~Gtid_log_event() { }
Log_event_type get_type_code() { return GTID_EVENT; }
int get_data_size() { return GTID_HEADER_LEN; }
bool is_valid() const { return seq_no != 0; }
#ifdef MYSQL_SERVER
bool write(IO_CACHE *file);
static int make_compatible_event(String *packet, bool *need_dummy_event,
ulong ev_offset, uint8 checksum_alg);
#endif
};
/**
@class Gtid_list_log_event
This event is logged at the start of every binlog file to record the
current replication state: the last global transaction id (GTID) applied
on the server within each replication domain.
It consists of a list of GTIDs, one for each replication domain ever seen
on the server.
@section Gtid_list_log_event_binary_format Binary Format
<table>
<caption>Post-Header</caption>
<tr>
<th>Name</th>
<th>Format</th>
<th>Description</th>
</tr>
<tr>
<td>count</td>
<td>4 byte unsigned integer</td>
<td>The lower 28 bits are the number of GTIDs. The upper 4 bits are
reserved for flags bits for future expansion</td>
</tr>
</table>
<table>
<caption>Body</caption>
<tr>
<th>Name</th>
<th>Format</th>
<th>Description</th>
</tr>
<tr>
<td>domain_id</td>
<td>4 byte unsigned integer</td>
<td>Replication domain id of one GTID</td>
</tr>
<tr>
<td>server_id</td>
<td>4 byte unsigned integer</td>
<td>Server id of one GTID</td>
</tr>
<tr>
<td>seq_no</td>
<td>8 byte unsigned integer</td>
<td>sequence number of one GTID</td>
</tr>
</table>
The three elements in the body repeat COUNT times to form the GTID list.
*/
class Gtid_list_log_event: public Log_event
{
public:
uint32 count;
struct rpl_gtid *list;
static const uint element_size= 4+4+8;
#ifdef MYSQL_SERVER
Gtid_list_log_event(rpl_state *gtid_set);
#ifdef HAVE_REPLICATION
void pack_info(THD *thd, Protocol *protocol);
#endif
#else
void print(FILE *file, PRINT_EVENT_INFO *print_event_info);
#endif
Gtid_list_log_event(const char *buf, uint event_len,
const Format_description_log_event *description_event);
~Gtid_list_log_event() { my_free(list); }
Log_event_type get_type_code() { return GTID_LIST_EVENT; }
int get_data_size() { return GTID_LIST_HEADER_LEN + count*element_size; }
bool is_valid() const { return list != NULL; }
#ifdef MYSQL_SERVER
bool write(IO_CACHE *file);
#endif
};
/* the classes below are for the new LOAD DATA INFILE logging */
/**
......
......@@ -675,6 +675,8 @@ mysql_mutex_t
mysql_mutex_t LOCK_stats, LOCK_global_user_client_stats,
LOCK_global_table_stats, LOCK_global_index_stats;
mysql_mutex_t LOCK_gtid_counter, LOCK_rpl_gtid_state;
/**
The below lock protects access to two global server variables:
max_prepared_stmt_count and prepared_stmt_count. These variables
......@@ -770,6 +772,8 @@ PSI_mutex_key key_LOCK_stats,
key_LOCK_global_index_stats,
key_LOCK_wakeup_ready;
PSI_mutex_key key_LOCK_gtid_counter, key_LOCK_rpl_gtid_state;
PSI_mutex_key key_LOCK_prepare_ordered, key_LOCK_commit_ordered;
static PSI_mutex_info all_server_mutexes[]=
......@@ -813,6 +817,8 @@ static PSI_mutex_info all_server_mutexes[]=
{ &key_LOCK_global_table_stats, "LOCK_global_table_stats", PSI_FLAG_GLOBAL},
{ &key_LOCK_global_index_stats, "LOCK_global_index_stats", PSI_FLAG_GLOBAL},
{ &key_LOCK_wakeup_ready, "THD::LOCK_wakeup_ready", 0},
{ &key_LOCK_gtid_counter, "LOCK_gtid_counter", PSI_FLAG_GLOBAL},
{ &key_LOCK_rpl_gtid_state, "LOCK_rpl_gtid_state", PSI_FLAG_GLOBAL},
{ &key_LOCK_thd_data, "THD::LOCK_thd_data", 0},
{ &key_LOCK_user_conn, "LOCK_user_conn", PSI_FLAG_GLOBAL},
{ &key_LOCK_uuid_short_generator, "LOCK_uuid_short_generator", PSI_FLAG_GLOBAL},
......@@ -1279,6 +1285,12 @@ struct st_VioSSLFd *ssl_acceptor_fd;
*/
uint connection_count= 0, extra_connection_count= 0;
/**
Running counter for generating new GTIDs locally.
*/
uint64 global_gtid_counter= 0;
/* Function declarations */
pthread_handler_t signal_hand(void *arg);
......@@ -1940,6 +1952,8 @@ static void clean_up_mutexes()
mysql_mutex_destroy(&LOCK_global_user_client_stats);
mysql_mutex_destroy(&LOCK_global_table_stats);
mysql_mutex_destroy(&LOCK_global_index_stats);
mysql_mutex_destroy(&LOCK_gtid_counter);
mysql_mutex_destroy(&LOCK_rpl_gtid_state);
#ifdef HAVE_OPENSSL
mysql_mutex_destroy(&LOCK_des_key_file);
#ifndef HAVE_YASSL
......@@ -4002,6 +4016,10 @@ static int init_thread_environment()
&LOCK_global_table_stats, MY_MUTEX_INIT_FAST);
mysql_mutex_init(key_LOCK_global_index_stats,
&LOCK_global_index_stats, MY_MUTEX_INIT_FAST);
mysql_mutex_init(key_LOCK_gtid_counter,
&LOCK_gtid_counter, MY_MUTEX_INIT_FAST);
mysql_mutex_init(key_LOCK_rpl_gtid_state,
&LOCK_rpl_gtid_state, MY_MUTEX_INIT_SLOW);
mysql_mutex_init(key_LOCK_prepare_ordered, &LOCK_prepare_ordered,
MY_MUTEX_INIT_SLOW);
mysql_mutex_init(key_LOCK_commit_ordered, &LOCK_commit_ordered,
......
......@@ -252,6 +252,8 @@ extern PSI_mutex_key key_LOCK_stats,
key_LOCK_global_user_client_stats, key_LOCK_global_table_stats,
key_LOCK_global_index_stats, key_LOCK_wakeup_ready;
extern PSI_mutex_key key_LOCK_gtid_counter, key_LOCK_rpl_gtid_state;
extern PSI_rwlock_key key_rwlock_LOCK_grant, key_rwlock_LOCK_logger,
key_rwlock_LOCK_sys_init_connect, key_rwlock_LOCK_sys_init_slave,
key_rwlock_LOCK_system_variables_hash, key_rwlock_query_cache_query_lock;
......@@ -341,6 +343,7 @@ extern mysql_mutex_t
LOCK_slave_list, LOCK_active_mi, LOCK_manager,
LOCK_global_system_variables, LOCK_user_conn,
LOCK_prepared_stmt_count, LOCK_error_messages, LOCK_connection_count;
extern mysql_mutex_t LOCK_gtid_counter, LOCK_rpl_gtid_state;
extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_thread_count;
#ifdef HAVE_OPENSSL
extern mysql_mutex_t LOCK_des_key_file;
......@@ -546,6 +549,7 @@ inline int set_current_thd(THD *thd)
extern handlerton *maria_hton;
extern uint extra_connection_count;
extern uint64 global_gtid_counter;
extern my_bool opt_userstat_running, debug_assert_if_crashed_table;
extern uint mysqld_extra_port;
extern ulong opt_progress_report_time;
......
......@@ -639,7 +639,7 @@ bool check_master_connection_name(LEX_STRING *name)
file names without a prefix.
*/
void create_logfile_name_with_suffix(char *res_file_name, uint length,
void create_logfile_name_with_suffix(char *res_file_name, size_t length,
const char *info_file, bool append,
LEX_STRING *suffix)
{
......
......@@ -170,7 +170,7 @@ class Master_info_index
};
bool check_master_connection_name(LEX_STRING *name);
void create_logfile_name_with_suffix(char *res_file_name, uint length,
void create_logfile_name_with_suffix(char *res_file_name, size_t length,
const char *info_file,
bool append,
LEX_STRING *suffix);
......
......@@ -534,6 +534,12 @@ typedef struct system_variables
thread the query is being run to replicate temp tables properly
*/
my_thread_id pseudo_thread_id;
/**
When replicating an event group with GTID, keep these values around so
slave binlog can receive the same GTID as the original.
*/
uint32 gtid_domain_id;
uint64 gtid_seq_no;
/**
Place holders to store Multi-source variables in sys_var.cc during
update and show of variables.
......
......@@ -616,10 +616,34 @@ send_event_to_slave(THD *thd, NET *net, String* const packet, ushort flags,
}
/*
Do not send binlog checkpoint events to a slave that does not understand it.
Replace GTID events with old-style BEGIN events for slaves that do not
understand global transaction IDs. For stand-alone events, where there is
no terminating COMMIT query event, omit the GTID event or replace it with
a dummy event, as appropriate.
*/
if (unlikely(event_type == BINLOG_CHECKPOINT_EVENT) &&
mariadb_slave_capability < MARIA_SLAVE_CAPABILITY_BINLOG_CHECKPOINT)
if (event_type == GTID_EVENT &&
mariadb_slave_capability < MARIA_SLAVE_CAPABILITY_GTID)
{
bool need_dummy=
mariadb_slave_capability < MARIA_SLAVE_CAPABILITY_TOLERATE_HOLES;
bool err= Gtid_log_event::make_compatible_event(packet, &need_dummy,
ev_offset,
current_checksum_alg);
if (err)
return "Failed to replace GTID event with backwards-compatible event: "
"currupt event.";
if (!need_dummy)
return NULL;
}
/*
Do not send binlog checkpoint or gtid list events to a slave that does not
understand it.
*/
if ((unlikely(event_type == BINLOG_CHECKPOINT_EVENT) &&
mariadb_slave_capability < MARIA_SLAVE_CAPABILITY_BINLOG_CHECKPOINT) ||
(unlikely(event_type == GTID_LIST_EVENT) &&
mariadb_slave_capability < MARIA_SLAVE_CAPABILITY_GTID))
{
if (mariadb_slave_capability >= MARIA_SLAVE_CAPABILITY_TOLERATE_HOLES)
{
......
......@@ -1201,6 +1201,29 @@ static Sys_var_ulong Sys_pseudo_thread_id(
BLOCK_SIZE(1), NO_MUTEX_GUARD, IN_BINLOG,
ON_CHECK(check_has_super));
static Sys_var_uint Sys_gtid_domain_id(
"gtid_domain_id",
"Used with global transaction ID to identify logically independent "
"replication streams. When events can propagate through multiple "
"parallel paths (for example multiple masters), each independent "
"source server must use a distinct domain_id. For simple tree-shaped "
"replication topologies, it can be left at its default, 0.",
SESSION_VAR(gtid_domain_id),
CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, UINT_MAX32), DEFAULT(0),
BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG,
ON_CHECK(check_has_super));
static Sys_var_ulonglong Sys_gtid_seq_no(
"gtid_seq_no",
"Internal server usage, for replication with global transaction id. "
"When set, next event group logged to the binary log will use this "
"sequence number, not generate a new one, thus allowing to preserve "
"master's GTID in slave's binlog.",
SESSION_ONLY(gtid_seq_no),
NO_CMD_LINE, VALID_RANGE(0, ULONGLONG_MAX), DEFAULT(0),
BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG,
ON_CHECK(check_has_super));
static bool fix_max_join_size(sys_var *self, THD *thd, enum_var_type type)
{
SV *sv= type == OPT_GLOBAL ? &global_system_variables : &thd->variables;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment