slave.cc 137 KB
Newer Older
1
/* Copyright (C) 2000-2003 MySQL AB
2

bk@work.mysql.com's avatar
bk@work.mysql.com committed
3 4
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6

bk@work.mysql.com's avatar
bk@work.mysql.com committed
7 8 9 10
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
11

bk@work.mysql.com's avatar
bk@work.mysql.com committed
12 13 14 15
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

16 17 18 19 20 21 22 23 24 25 26

/**
  @addtogroup Replication
  @{

  @file

  @brief Code to run the io thread and the sql thread on the
  replication slave.
*/

bk@work.mysql.com's avatar
bk@work.mysql.com committed
27
#include "mysql_priv.h"
hf@deer.mysql.r18.ru's avatar
SCRUM  
hf@deer.mysql.r18.ru committed
28

bk@work.mysql.com's avatar
bk@work.mysql.com committed
29
#include <mysql.h>
30
#include <myisam.h>
31
#include "slave.h"
32 33
#include "rpl_mi.h"
#include "rpl_rli.h"
34
#include "sql_repl.h"
lars@mysql.com's avatar
lars@mysql.com committed
35
#include "rpl_filter.h"
36
#include "repl_failsafe.h"
bk@work.mysql.com's avatar
bk@work.mysql.com committed
37
#include <thr_alarm.h>
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
38
#include <my_dir.h>
hf@deer.(none)'s avatar
hf@deer.(none) committed
39
#include <sql_common.h>
40
#include <errmsg.h>
41
#include <mysys_err.h>
hf@deer.mysql.r18.ru's avatar
SCRUM  
hf@deer.mysql.r18.ru committed
42

43 44 45 46
#ifdef HAVE_REPLICATION

#include "rpl_tblmap.h"

47
#define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
48

49
#define MAX_SLAVE_RETRY_PAUSE 5
50 51 52
bool use_slave_mask = 0;
MY_BITMAP slave_error_mask;

53 54
typedef bool (*CHECK_KILLED_FUNC)(THD*,void*);

55
char* slave_load_tmpdir = 0;
56
Master_info *active_mi= 0;
57
my_bool replicate_same_server_id;
58
ulonglong relay_log_space_limit = 0;
59 60 61 62 63

/*
  When slave thread exits, we need to remember the temporary tables so we
  can re-use them on slave start.

64
  TODO: move the vars below under Master_info
65
*/
66

67
int disconnect_slave_event_count = 0, abort_slave_event_count = 0;
68
int events_till_abort = -1;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
69

70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
enum enum_slave_reconnect_actions
{
  SLAVE_RECON_ACT_REG= 0,
  SLAVE_RECON_ACT_DUMP= 1,
  SLAVE_RECON_ACT_EVENT= 2,
  SLAVE_RECON_ACT_MAX
};

enum enum_slave_reconnect_messages
{
  SLAVE_RECON_MSG_WAIT= 0,
  SLAVE_RECON_MSG_KILLED_WAITING= 1,
  SLAVE_RECON_MSG_AFTER= 2,
  SLAVE_RECON_MSG_FAILED= 3,
  SLAVE_RECON_MSG_COMMAND= 4,
  SLAVE_RECON_MSG_KILLED_AFTER= 5,
  SLAVE_RECON_MSG_MAX
};

static const char *reconnect_messages[SLAVE_RECON_ACT_MAX][SLAVE_RECON_MSG_MAX]=
{
  {
    "Waiting to reconnect after a failed registration on master",
    "Slave I/O thread killed while waitnig to reconnect after a failed \
registration on master",
    "Reconnecting after a failed registration on master",
    "failed registering on master, reconnecting to try again, \
log '%s' at postion %s",
    "COM_REGISTER_SLAVE",
    "Slave I/O thread killed during or after reconnect"
  },
  {
    "Waiting to reconnect after a failed binlog dump request",
    "Slave I/O thread killed while retrying master dump",
    "Reconnecting after a failed binlog dump request",
    "failed dump request, reconnecting to try again, log '%s' at postion %s",
    "COM_BINLOG_DUMP",
    "Slave I/O thread killed during or after reconnect"
  },
  {
    "Waiting to reconnect after a failed master event read",
    "Slave I/O thread killed while waiting to reconnect after a failed read",
    "Reconnecting after a failed master event read",
    "Slave I/O thread: Failed reading log event, reconnecting to retry, \
log '%s' at postion %s",
    "",
    "Slave I/O thread killed during or after a reconnect done to recover from \
failed read"
  }
};
 

122
typedef enum { SLAVE_THD_IO, SLAVE_THD_SQL} SLAVE_THD_TYPE;
123

124 125
static int process_io_rotate(Master_info* mi, Rotate_log_event* rev);
static int process_io_create_file(Master_info* mi, Create_file_log_event* cev);
126
static bool wait_for_relay_log_space(Relay_log_info* rli);
127
static inline bool io_slave_killed(THD* thd,Master_info* mi);
128
static inline bool sql_slave_killed(THD* thd,Relay_log_info* rli);
129
static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type);
130 131
static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi);
static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
132
                          bool suppress_warnings);
133
static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
134
                             bool reconnect, bool suppress_warnings);
135
static int safe_sleep(THD* thd, int sec, CHECK_KILLED_FUNC thread_killed,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
136
                      void* thread_killed_arg);
137
static int request_table_dump(MYSQL* mysql, const char* db, const char* table);
138
static int create_table_from_dump(THD* thd, MYSQL *mysql, const char* db,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
139
                                  const char* table_name, bool overwrite);
140
static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi);
141
static Log_event* next_event(Relay_log_info* rli);
142
static int queue_event(Master_info* mi,const char* buf,ulong event_len);
143 144 145 146 147
static int terminate_slave_thread(THD *thd,
                                  pthread_mutex_t* term_lock,
                                  pthread_cond_t* term_cond,
                                  volatile uint *slave_running,
                                  bool skip_lock);
148
static bool check_io_slave_killed(THD *thd, Master_info *mi, const char *info);
149 150

/*
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
151
  Find out which replications threads are running
152

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
153 154
  SYNOPSIS
    init_thread_mask()
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
155 156 157
    mask                Return value here
    mi                  master_info for slave
    inverse             If set, returns which threads are not running
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
158 159 160 161 162 163

  IMPLEMENTATION
    Get a bit mask for which threads are running so that we can later restart
    these threads.

  RETURN
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
164 165
    mask        If inverse == 0, running threads
                If inverse == 1, stopped threads
166 167
*/

168
void init_thread_mask(int* mask,Master_info* mi,bool inverse)
169 170 171
{
  bool set_io = mi->slave_running, set_sql = mi->rli.slave_running;
  register int tmp_mask=0;
172 173
  DBUG_ENTER("init_thread_mask");

174 175 176 177
  if (set_io)
    tmp_mask |= SLAVE_IO;
  if (set_sql)
    tmp_mask |= SLAVE_SQL;
178 179
  if (inverse)
    tmp_mask^= (SLAVE_IO | SLAVE_SQL);
180
  *mask = tmp_mask;
181
  DBUG_VOID_RETURN;
182 183
}

184

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
185
/*
186
  lock_slave_threads()
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
187
*/
188

189
void lock_slave_threads(Master_info* mi)
190
{
191 192
  DBUG_ENTER("lock_slave_threads");

193 194 195
  //TODO: see if we can do this without dual mutex
  pthread_mutex_lock(&mi->run_lock);
  pthread_mutex_lock(&mi->rli.run_lock);
196
  DBUG_VOID_RETURN;
197 198
}

199

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
200
/*
201
  unlock_slave_threads()
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
202
*/
203

204
void unlock_slave_threads(Master_info* mi)
205
{
206 207
  DBUG_ENTER("unlock_slave_threads");

208 209 210
  //TODO: see if we can do this without dual mutex
  pthread_mutex_unlock(&mi->rli.run_lock);
  pthread_mutex_unlock(&mi->run_lock);
211
  DBUG_VOID_RETURN;
212 213
}

214

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
215
/* Initialize slave structures */
216

217 218
int init_slave()
{
219
  DBUG_ENTER("init_slave");
220

221 222 223 224 225 226
  /*
    This is called when mysqld starts. Before client connections are
    accepted. However bootstrap may conflict with us if it does START SLAVE.
    So it's safer to take the lock.
  */
  pthread_mutex_lock(&LOCK_active_mi);
227 228 229 230
  /*
    TODO: re-write this to interate through the list of files
    for multi-master
  */
231
  active_mi= new Master_info;
232 233

  /*
234 235 236
    If master_host is not specified, try to read it from the master_info file.
    If master_host is specified, create the master_info file if it doesn't
    exists.
237
  */
238 239 240 241 242
  if (!active_mi)
  {
    sql_print_error("Failed to allocate memory for the master info structure");
    goto err;
  }
243

monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
244
  if (init_master_info(active_mi,master_info_file,relay_log_info_file,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
245
                       !master_host, (SLAVE_IO | SLAVE_SQL)))
246
  {
247
    sql_print_error("Failed to initialize the master info structure");
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
248
    goto err;
249
  }
250 251 252 253

  if (server_id && !master_host && active_mi->host[0])
    master_host= active_mi->host;

254 255
  /* If server id is not set, start_slave_thread() will say it */

256
  if (master_host && !opt_skip_slave_start)
257
  {
258
    if (start_slave_threads(1 /* need mutex */,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
259 260 261 262 263
                            0 /* no wait for start*/,
                            active_mi,
                            master_info_file,
                            relay_log_info_file,
                            SLAVE_IO | SLAVE_SQL))
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
264
    {
265
      sql_print_error("Failed to create slave threads");
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
266 267
      goto err;
    }
268
  }
269
  pthread_mutex_unlock(&LOCK_active_mi);
270
  DBUG_RETURN(0);
271

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
272
err:
273
  pthread_mutex_unlock(&LOCK_active_mi);
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
274
  DBUG_RETURN(1);
275
}
276

277

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
278
/*
279
  Init function to set up array for errors that should be skipped for slave
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
280

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
281 282
  SYNOPSIS
    init_slave_skip_errors()
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
283
    arg         List of errors numbers to skip, separated with ','
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
284 285 286 287

  NOTES
    Called from get_options() in mysqld.cc on start-up
*/
288

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
289
void init_slave_skip_errors(const char* arg)
290
{
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
291
  const char *p;
292 293
  DBUG_ENTER("init_slave_skip_errors");

294
  if (bitmap_init(&slave_error_mask,0,MAX_SLAVE_ERROR,0))
295 296 297 298 299
  {
    fprintf(stderr, "Badly out of memory, please check your system status\n");
    exit(1);
  }
  use_slave_mask = 1;
300
  for (;my_isspace(system_charset_info,*arg);++arg)
301
    /* empty */;
302
  if (!my_strnncoll(system_charset_info,(uchar*)arg,4,(const uchar*)"all",4))
303 304
  {
    bitmap_set_all(&slave_error_mask);
305
    DBUG_VOID_RETURN;
306 307 308 309 310 311 312 313
  }
  for (p= arg ; *p; )
  {
    long err_code;
    if (!(p= str2int(p, 10, 0, LONG_MAX, &err_code)))
      break;
    if (err_code < MAX_SLAVE_ERROR)
       bitmap_set_bit(&slave_error_mask,(uint)err_code);
314
    while (!my_isdigit(system_charset_info,*p) && *p)
315 316
      p++;
  }
317
  DBUG_VOID_RETURN;
318 319
}

320

321
int terminate_slave_threads(Master_info* mi,int thread_mask,bool skip_lock)
322
{
323 324
  DBUG_ENTER("terminate_slave_threads");

325
  if (!mi->inited)
326
    DBUG_RETURN(0); /* successfully do nothing */
327 328
  int error,force_all = (thread_mask & SLAVE_FORCE_ALL);
  pthread_mutex_t *sql_lock = &mi->rli.run_lock, *io_lock = &mi->run_lock;
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
329

330
  if ((thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL)))
331
  {
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
332
    DBUG_PRINT("info",("Terminating IO thread"));
333 334
    mi->abort_slave=1;
    if ((error=terminate_slave_thread(mi->io_thd,io_lock,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
335
                                      &mi->stop_cond,
336 337
                                      &mi->slave_running,
                                      skip_lock)) &&
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
338
        !force_all)
339
      DBUG_RETURN(error);
340
  }
341
  if ((thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL)))
342
  {
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
343
    DBUG_PRINT("info",("Terminating SQL thread"));
344 345
    mi->rli.abort_slave=1;
    if ((error=terminate_slave_thread(mi->rli.sql_thd,sql_lock,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
346
                                      &mi->rli.stop_cond,
347 348
                                      &mi->rli.slave_running,
                                      skip_lock)) &&
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
349
        !force_all)
350
      DBUG_RETURN(error);
351
  }
352
  DBUG_RETURN(0);
353 354
}

355

356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
/**
   Wait for a slave thread to terminate.

   This function is called after requesting the thread to terminate
   (by setting @c abort_slave member of @c Relay_log_info or @c
   Master_info structure to 1). Termination of the thread is
   controlled with the the predicate <code>*slave_running</code>.

   Function will acquire @c term_lock before waiting on the condition
   unless @c skip_lock is true in which case the mutex should be owned
   by the caller of this function and will remain acquired after
   return from the function.

   @param term_lock
          Associated lock to use when waiting for @c term_cond

   @param term_cond
          Condition that is signalled when the thread has terminated

   @param slave_running
          Pointer to predicate to check for slave thread termination

   @param skip_lock
          If @c true the lock will not be acquired before waiting on
          the condition. In this case, it is assumed that the calling
          function acquires the lock before calling this function.

   @retval 0 All OK
 */
static int
terminate_slave_thread(THD *thd,
                       pthread_mutex_t* term_lock,
                       pthread_cond_t* term_cond,
                       volatile uint *slave_running,
                       bool skip_lock)
391
{
392 393
  int error;

394
  DBUG_ENTER("terminate_slave_thread");
395 396

  if (!skip_lock)
397
    pthread_mutex_lock(term_lock);
398 399 400 401 402 403

  safe_mutex_assert_owner(term_lock);

  if (!*slave_running)
  {
    if (!skip_lock)
404
      pthread_mutex_unlock(term_lock);
405
    DBUG_RETURN(ER_SLAVE_NOT_RUNNING);
406 407
  }
  DBUG_ASSERT(thd != 0);
408
  THD_CHECK_SENTRY(thd);
409

410
  /*
411
    Is is critical to test if the slave is running. Otherwise, we might
412
    be referening freed memory trying to kick it
413
  */
414

grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
415
  while (*slave_running)                        // Should always be true
416
  {
417
    DBUG_PRINT("loop", ("killing slave thread"));
418
    KICK_SLAVE(thd);
419 420 421
    /*
      There is a small chance that slave thread might miss the first
      alarm. To protect againts it, resend the signal until it reacts
422 423
    */
    struct timespec abstime;
424
    set_timespec(abstime,2);
425 426
    error= pthread_cond_timedwait(term_cond, term_lock, &abstime);
    DBUG_ASSERT(error == ETIMEDOUT || error == 0);
427
  }
428 429 430 431

  DBUG_ASSERT(*slave_running == 0);

  if (!skip_lock)
432
    pthread_mutex_unlock(term_lock);
433
  DBUG_RETURN(0);
434 435
}

436

437
int start_slave_thread(pthread_handler h_func, pthread_mutex_t *start_lock,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
438 439 440 441
                       pthread_mutex_t *cond_lock,
                       pthread_cond_t *start_cond,
                       volatile uint *slave_running,
                       volatile ulong *slave_run_id,
442
                       Master_info* mi,
443
                       bool high_priority)
444 445
{
  pthread_t th;
446 447 448
  ulong start_id;
  DBUG_ENTER("start_slave_thread");

449 450
  DBUG_ASSERT(mi->inited);

451 452 453 454 455 456 457 458 459
  if (start_lock)
    pthread_mutex_lock(start_lock);
  if (!server_id)
  {
    if (start_cond)
      pthread_cond_broadcast(start_cond);
    if (start_lock)
      pthread_mutex_unlock(start_lock);
    sql_print_error("Server id not set, will not start slave");
460
    DBUG_RETURN(ER_BAD_SLAVE);
461
  }
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
462

463
  if (*slave_running)
464 465 466 467 468
  {
    if (start_cond)
      pthread_cond_broadcast(start_cond);
    if (start_lock)
      pthread_mutex_unlock(start_lock);
469
    DBUG_RETURN(ER_SLAVE_MUST_STOP);
470
  }
471 472
  start_id= *slave_run_id;
  DBUG_PRINT("info",("Creating new slave thread"));
473 474
  if (high_priority)
    my_pthread_attr_setprio(&connection_attrib,CONNECT_PRIOR);
475 476 477 478
  if (pthread_create(&th, &connection_attrib, h_func, (void*)mi))
  {
    if (start_lock)
      pthread_mutex_unlock(start_lock);
479
    DBUG_RETURN(ER_SLAVE_THREAD);
480
  }
guilhem@mysql.com's avatar
guilhem@mysql.com committed
481
  if (start_cond && cond_lock) // caller has cond_lock
482 483
  {
    THD* thd = current_thd;
484
    while (start_id == *slave_run_id)
485
    {
486
      DBUG_PRINT("sleep",("Waiting for slave thread to start"));
487
      const char* old_msg = thd->enter_cond(start_cond,cond_lock,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
488
                                            "Waiting for slave thread to start");
489 490
      pthread_cond_wait(start_cond,cond_lock);
      thd->exit_cond(old_msg);
guilhem@mysql.com's avatar
guilhem@mysql.com committed
491
      pthread_mutex_lock(cond_lock); // re-acquire it as exit_cond() released
492
      if (thd->killed)
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
493
        DBUG_RETURN(thd->killed_errno());
494 495 496 497
    }
  }
  if (start_lock)
    pthread_mutex_unlock(start_lock);
498
  DBUG_RETURN(0);
499
}
500

501

502
/*
503
  start_slave_threads()
504

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
505 506 507 508
  NOTES
    SLAVE_FORCE_ALL is not implemented here on purpose since it does not make
    sense to do that for starting a slave--we always care if it actually
    started the threads that were not previously running
509
*/
510

511
int start_slave_threads(bool need_slave_mutex, bool wait_for_start,
512
                        Master_info* mi, const char* master_info_fname,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
513
                        const char* slave_info_fname, int thread_mask)
514 515 516 517
{
  pthread_mutex_t *lock_io=0,*lock_sql=0,*lock_cond_io=0,*lock_cond_sql=0;
  pthread_cond_t* cond_io=0,*cond_sql=0;
  int error=0;
518
  DBUG_ENTER("start_slave_threads");
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
519

520 521 522 523 524 525 526 527 528 529 530 531
  if (need_slave_mutex)
  {
    lock_io = &mi->run_lock;
    lock_sql = &mi->rli.run_lock;
  }
  if (wait_for_start)
  {
    cond_io = &mi->start_cond;
    cond_sql = &mi->rli.start_cond;
    lock_cond_io = &mi->run_lock;
    lock_cond_sql = &mi->rli.run_lock;
  }
532 533 534

  if (thread_mask & SLAVE_IO)
    error=start_slave_thread(handle_slave_io,lock_io,lock_cond_io,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
535 536 537
                             cond_io,
                             &mi->slave_running, &mi->slave_run_id,
                             mi, 1); //high priority, to read the most possible
538
  if (!error && (thread_mask & SLAVE_SQL))
539
  {
540
    error=start_slave_thread(handle_slave_sql,lock_sql,lock_cond_sql,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
541 542 543
                             cond_sql,
                             &mi->rli.slave_running, &mi->rli.slave_run_id,
                             mi, 0);
544 545 546
    if (error)
      terminate_slave_threads(mi, thread_mask & SLAVE_IO, 0);
  }
547
  DBUG_RETURN(error);
548
}
549

550

551
#ifdef NOT_USED_YET
552
static int end_slave_on_walk(Master_info* mi, uchar* /*unused*/)
553
{
554 555
  DBUG_ENTER("end_slave_on_walk");

556
  end_master_info(mi);
557
  DBUG_RETURN(0);
558
}
559
#endif
560

561

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
562 563 564 565 566 567
/*
  Free all resources used by slave

  SYNOPSIS
    end_slave()
*/
568

569 570
void end_slave()
{
571 572
  DBUG_ENTER("end_slave");

573 574 575 576 577 578 579 580
  /*
    This is called when the server terminates, in close_connections().
    It terminates slave threads. However, some CHANGE MASTER etc may still be
    running presently. If a START SLAVE was in progress, the mutex lock below
    will make us wait until slave threads have started, and START SLAVE
    returns, then we terminate them here.
  */
  pthread_mutex_lock(&LOCK_active_mi);
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
581 582 583 584 585 586 587 588 589 590 591 592
  if (active_mi)
  {
    /*
      TODO: replace the line below with
      list_walk(&master_list, (list_walk_action)end_slave_on_walk,0);
      once multi-master code is ready.
    */
    terminate_slave_threads(active_mi,SLAVE_FORCE_ALL);
    end_master_info(active_mi);
    delete active_mi;
    active_mi= 0;
  }
593
  pthread_mutex_unlock(&LOCK_active_mi);
594
  DBUG_VOID_RETURN;
595
}
596

597

598
static bool io_slave_killed(THD* thd, Master_info* mi)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
599
{
600 601
  DBUG_ENTER("io_slave_killed");

602
  DBUG_ASSERT(mi->io_thd == thd);
603
  DBUG_ASSERT(mi->slave_running); // tracking buffer overrun
604
  DBUG_RETURN(mi->abort_slave || abort_loop || thd->killed);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
605 606
}

607

608
static bool sql_slave_killed(THD* thd, Relay_log_info* rli)
609
{
610 611
  DBUG_ENTER("sql_slave_killed");

612 613
  DBUG_ASSERT(rli->sql_thd == thd);
  DBUG_ASSERT(rli->slave_running == 1);// tracking buffer overrun
614 615 616 617 618 619 620 621 622 623 624
  if (abort_loop || thd->killed || rli->abort_slave)
  {
    /*
      If we are in an unsafe situation (stopping could corrupt replication),
      we give one minute to the slave SQL thread of grace before really
      terminating, in the hope that it will be able to read more events and
      the unsafe situation will soon be left. Note that this one minute starts
      from the last time anything happened in the slave SQL thread. So it's
      really one minute of idleness, we don't timeout if the slave SQL thread
      is actively working.
    */
625
    if (rli->last_event_start_time == 0)
626
      DBUG_RETURN(1);
627 628
    DBUG_PRINT("info", ("Slave SQL thread is in an unsafe situation, giving "
                        "it some grace period"));
629
    if (difftime(time(0), rli->last_event_start_time) > 60)
630
    {
631 632 633 634 635 636 637
      rli->report(ERROR_LEVEL, 0,
                  "SQL thread had to stop in an unsafe situation, in "
                  "the middle of applying updates to a "
                  "non-transactional table without any primary key. "
                  "There is a risk of duplicate updates when the slave "
                  "SQL thread is restarted. Please check your tables' "
                  "contents after restart.");
638
      DBUG_RETURN(1);
639 640
    }
  }
641
  DBUG_RETURN(0);
642 643
}

644

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
645
/*
646 647
  skip_load_data_infile()

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
648 649 650
  NOTES
    This is used to tell a 3.23 master to break send_file()
*/
651 652 653

void skip_load_data_infile(NET *net)
{
654 655
  DBUG_ENTER("skip_load_data_infile");

656
  (void)net_request_file(net, "/dev/null");
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
657
  (void)my_net_read(net);                               // discard response
658
  (void)net_write_command(net, 0, (uchar*) "", 0, (uchar*) "", 0); // ok
659
  DBUG_VOID_RETURN;
660 661
}

662

663
bool net_request_file(NET* net, const char* fname)
664
{
665
  DBUG_ENTER("net_request_file");
666 667
  DBUG_RETURN(net_write_command(net, 251, (uchar*) fname, strlen(fname),
                                (uchar*) "", 0));
668 669
}

670 671
/*
  From other comments and tests in code, it looks like
672
  sometimes Query_log_event and Load_log_event can have db == 0
673 674 675
  (see rewrite_db() above for example)
  (cases where this happens are unclear; it may be when the master is 3.23).
*/
676 677

const char *print_slave_db_safe(const char* db)
678
{
679 680 681
  DBUG_ENTER("*print_slave_db_safe");

  DBUG_RETURN((db ? db : ""));
682
}
683

684
int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
685
                                 const char *default_val)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
686
{
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
687
  uint length;
688 689
  DBUG_ENTER("init_strvar_from_file");

monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
690 691 692 693 694 695
  if ((length=my_b_gets(f,var, max_size)))
  {
    char* last_p = var + length -1;
    if (*last_p == '\n')
      *last_p = 0; // if we stopped on newline, kill it
    else
bk@work.mysql.com's avatar
bk@work.mysql.com committed
696
    {
697
      /*
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
698 699
        If we truncated a line or stopped on last char, remove all chars
        up to and including newline.
700
      */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
701
      int c;
702
      while (((c=my_b_get(f)) != '\n' && c != my_b_EOF));
703
    }
704
    DBUG_RETURN(0);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
705 706 707
  }
  else if (default_val)
  {
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
708
    strmake(var,  default_val, max_size-1);
709
    DBUG_RETURN(0);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
710
  }
711
  DBUG_RETURN(1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
712 713
}

714

715
int init_intvar_from_file(int* var, IO_CACHE* f, int default_val)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
716 717
{
  char buf[32];
718 719
  DBUG_ENTER("init_intvar_from_file");

grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
720 721

  if (my_b_gets(f, buf, sizeof(buf)))
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
722 723
  {
    *var = atoi(buf);
724
    DBUG_RETURN(0);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
725
  }
726
  else if (default_val)
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
727 728
  {
    *var = default_val;
729
    DBUG_RETURN(0);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
730
  }
731
  DBUG_RETURN(1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
732 733
}

734 735 736 737 738 739 740 741
/*
  Note that we rely on the master's version (3.23, 4.0.14 etc) instead of
  relying on the binlog's version. This is not perfect: imagine an upgrade
  of the master without waiting that all slaves are in sync with the master;
  then a slave could be fooled about the binlog's format. This is what happens
  when people upgrade a 3.23 master to 4.0 without doing RESET MASTER: 4.0
  slaves are fooled. So we do this only to distinguish between 3.23 and more
  recent masters (it's too late to change things for 3.23).
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
742

743 744 745 746
  RETURNS
  0       ok
  1       error
*/
747

748
static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi)
749
{
750
  const char* errmsg= 0;
751
  DBUG_ENTER("get_master_version_and_clock");
752 753 754 755 756 757 758

  /*
    Free old description_event_for_queue (that is needed if we are in
    a reconnection).
  */
  delete mi->rli.relay_log.description_event_for_queue;
  mi->rli.relay_log.description_event_for_queue= 0;
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
759

760
  if (!my_isdigit(&my_charset_bin,*mysql->server_version))
pem@mysql.comhem.se's avatar
pem@mysql.comhem.se committed
761
    errmsg = "Master reported unrecognized MySQL version";
762 763 764 765 766
  else
  {
    /*
      Note the following switch will bug when we have MySQL branch 30 ;)
    */
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
767
    switch (*mysql->server_version)
768 769 770 771 772 773 774 775
    {
    case '0':
    case '1':
    case '2':
      errmsg = "Master reported unrecognized MySQL version";
      break;
    case '3':
      mi->rli.relay_log.description_event_for_queue= new
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
776
        Format_description_log_event(1, mysql->server_version);
777 778 779
      break;
    case '4':
      mi->rli.relay_log.description_event_for_queue= new
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
780
        Format_description_log_event(3, mysql->server_version);
781
      break;
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
782
    default:
783 784 785 786 787 788 789 790 791
      /*
        Master is MySQL >=5.0. Give a default Format_desc event, so that we can
        take the early steps (like tests for "is this a 3.23 master") which we
        have to take before we receive the real master's Format_desc which will
        override this one. Note that the Format_desc we create below is garbage
        (it has the format of the *slave*); it's only good to help know if the
        master is 3.23, 4.0, etc.
      */
      mi->rli.relay_log.description_event_for_queue= new
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
792
        Format_description_log_event(4, mysql->server_version);
793 794
      break;
    }
795
  }
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
796 797

  /*
798 799 800 801 802
     This does not mean that a 5.0 slave will be able to read a 6.0 master; but
     as we don't know yet, we don't want to forbid this for now. If a 5.0 slave
     can't read a 6.0 master, this will show up when the slave can't read some
     events sent by the master, and there will be error messages.
  */
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
803

804 805 806
  if (errmsg)
  {
    sql_print_error(errmsg);
807
    DBUG_RETURN(1);
808
  }
809 810 811 812

  /* as we are here, we tried to allocate the event */
  if (!mi->rli.relay_log.description_event_for_queue)
  {
813 814 815
    mi->report(ERROR_LEVEL, ER_SLAVE_CREATE_EVENT_FAILURE,
               ER(ER_SLAVE_CREATE_EVENT_FAILURE),
               "default Format_description_log_event");
816
    DBUG_RETURN(1);
817 818
  }

819 820 821 822 823 824
  /*
    Compare the master and slave's clock. Do not die if master's clock is
    unavailable (very old master not supporting UNIX_TIMESTAMP()?).
  */
  MYSQL_RES *master_res= 0;
  MYSQL_ROW master_row;
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
825

826
  if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT UNIX_TIMESTAMP()")) &&
827 828
      (master_res= mysql_store_result(mysql)) &&
      (master_row= mysql_fetch_row(master_res)))
guilhem@gbichot2's avatar
guilhem@gbichot2 committed
829
  {
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
830
    mi->clock_diff_with_master=
831
      (long) (time((time_t*) 0) - strtoul(master_row[0], 0, 10));
guilhem@gbichot2's avatar
guilhem@gbichot2 committed
832
  }
833
  else if (!check_io_slave_killed(mi->io_thd, mi, NULL))
guilhem@gbichot2's avatar
guilhem@gbichot2 committed
834
  {
835
    mi->clock_diff_with_master= 0; /* The "most sensible" value */
836 837 838 839
    sql_print_warning("\"SELECT UNIX_TIMESTAMP()\" failed on master, "
                      "do not trust column Seconds_Behind_Master of SHOW "
                      "SLAVE STATUS. Error: %s (%d)",
                      mysql_error(mysql), mysql_errno(mysql));
840 841
  }
  if (master_res)
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
842 843
    mysql_free_result(master_res);

844 845 846 847 848 849 850 851 852 853
  /*
    Check that the master's server id and ours are different. Because if they
    are equal (which can result from a simple copy of master's datadir to slave,
    thus copying some my.cnf), replication will work but all events will be
    skipped.
    Do not die if SHOW VARIABLES LIKE 'SERVER_ID' fails on master (very old
    master?).
    Note: we could have put a @@SERVER_ID in the previous SELECT
    UNIX_TIMESTAMP() instead, but this would not have worked on 3.23 masters.
  */
854 855
  if (!mysql_real_query(mysql,
                        STRING_WITH_LEN("SHOW VARIABLES LIKE 'SERVER_ID'")) &&
856 857 858 859
      (master_res= mysql_store_result(mysql)))
  {
    if ((master_row= mysql_fetch_row(master_res)) &&
        (::server_id == strtoul(master_row[1], 0, 10)) &&
860
        !mi->rli.replicate_same_server_id)
861 862 863 864 865
      errmsg= "The slave I/O thread stops because master and slave have equal \
MySQL server ids; these ids must be different for replication to work (or \
the --replicate-same-server-id option must be used on slave but this does \
not always make sense; please check the manual before using it).";
    mysql_free_result(master_res);
guilhem@gbichot2's avatar
guilhem@gbichot2 committed
866 867
  }

868 869 870
  /*
    Check that the master's global character_set_server and ours are the same.
    Not fatal if query fails (old master?).
871 872 873 874 875 876
    Note that we don't check for equality of global character_set_client and
    collation_connection (neither do we prevent their setting in
    set_var.cc). That's because from what I (Guilhem) have tested, the global
    values of these 2 are never used (new connections don't use them).
    We don't test equality of global collation_database either as it's is
    going to be deprecated (made read-only) in 4.1 very soon.
877 878 879 880 881 882
    The test is only relevant if master < 5.0.3 (we'll test only if it's older
    than the 5 branch; < 5.0.3 was alpha...), as >= 5.0.3 master stores
    charset info in each binlog event.
    We don't do it for 3.23 because masters <3.23.50 hang on
    SELECT @@unknown_var (BUG#7965 - see changelog of 3.23.50). So finally we
    test only if master is 4.x.
883
  */
884 885 886

  /* redundant with rest of code but safer against later additions */
  if (*mysql->server_version == '3')
887
    goto err;
888 889

  if ((*mysql->server_version == '4') &&
890 891
      !mysql_real_query(mysql,
                        STRING_WITH_LEN("SELECT @@GLOBAL.COLLATION_SERVER")) &&
892
      (master_res= mysql_store_result(mysql)))
guilhem@gbichot2's avatar
guilhem@gbichot2 committed
893
  {
894 895 896 897 898 899
    if ((master_row= mysql_fetch_row(master_res)) &&
        strcmp(master_row[0], global_system_variables.collation_server->name))
      errmsg= "The slave I/O thread stops because master and slave have \
different values for the COLLATION_SERVER global variable. The values must \
be equal for replication to work";
    mysql_free_result(master_res);
guilhem@gbichot2's avatar
guilhem@gbichot2 committed
900
  }
901

902 903 904 905 906 907 908 909
  /*
    Perform analogous check for time zone. Theoretically we also should
    perform check here to verify that SYSTEM time zones are the same on
    slave and master, but we can't rely on value of @@system_time_zone
    variable (it is time zone abbreviation) since it determined at start
    time and so could differ for slave and master even if they are really
    in the same system time zone. So we are omiting this check and just
    relying on documentation. Also according to Monty there are many users
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
910 911 912
    who are using replication between servers in various time zones. Hence
    such check will broke everything for them. (And now everything will
    work for them because by default both their master and slave will have
913
    'SYSTEM' time zone).
914 915
    This check is only necessary for 4.x masters (and < 5.0.4 masters but
    those were alpha).
916
  */
917
  if ((*mysql->server_version == '4') &&
918
      !mysql_real_query(mysql, STRING_WITH_LEN("SELECT @@GLOBAL.TIME_ZONE")) &&
919
      (master_res= mysql_store_result(mysql)))
guilhem@gbichot2's avatar
guilhem@gbichot2 committed
920
  {
921
    if ((master_row= mysql_fetch_row(master_res)) &&
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
922
        strcmp(master_row[0],
923 924 925 926 927
               global_system_variables.time_zone->get_name()->ptr()))
      errmsg= "The slave I/O thread stops because master and slave have \
different values for the TIME_ZONE global variable. The values must \
be equal for replication to work";
    mysql_free_result(master_res);
guilhem@gbichot2's avatar
guilhem@gbichot2 committed
928 929
  }

930
err:
931 932 933
  if (errmsg)
  {
    sql_print_error(errmsg);
934
    DBUG_RETURN(1);
935
  }
936

937
  DBUG_RETURN(0);
938 939
}

940 941 942 943
/*
  Used by fetch_master_table (used by LOAD TABLE tblname FROM MASTER and LOAD
  DATA FROM MASTER). Drops the table (if 'overwrite' is true) and recreates it
  from the dump. Honours replication inclusion/exclusion rules.
944
  db must be non-zero (guarded by assertion).
945 946 947 948 949

  RETURN VALUES
    0           success
    1           error
*/
950

951
static int create_table_from_dump(THD* thd, MYSQL *mysql, const char* db,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
952
                                  const char* table_name, bool overwrite)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
953
{
954
  ulong packet_len;
955 956
  char *query, *save_db;
  uint32 save_db_length;
957 958
  Vio* save_vio;
  HA_CHECK_OPT check_opt;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
959
  TABLE_LIST tables;
960 961
  int error= 1;
  handler *file;
962
  ulonglong save_options;
963
  NET *net= &mysql->net;
964
  const char *found_semicolon= NULL;
965
  DBUG_ENTER("create_table_from_dump");
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
966

967
  packet_len= my_net_read(net); // read create table statement
968 969
  if (packet_len == packet_error)
  {
bell@sanja.is.com.ua's avatar
bell@sanja.is.com.ua committed
970
    my_message(ER_MASTER_NET_READ, ER(ER_MASTER_NET_READ), MYF(0));
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
971
    DBUG_RETURN(1);
972 973 974
  }
  if (net->read_pos[0] == 255) // error from master
  {
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
975
    char *err_msg;
976
    err_msg= (char*) net->read_pos + ((mysql->server_capabilities &
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
977 978
                                       CLIENT_PROTOCOL_41) ?
                                      3+SQLSTATE_LENGTH+1 : 3);
979
    my_error(ER_MASTER, MYF(0), err_msg);
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
980
    DBUG_RETURN(1);
981
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
982
  thd->command = COM_TABLE_DUMP;
983
  thd->query_length= packet_len;
984
  /* Note that we should not set thd->query until the area is initalized */
985
  if (!(query = thd->strmake((char*) net->read_pos, packet_len)))
986 987
  {
    sql_print_error("create_table_from_dump: out of memory");
bell@sanja.is.com.ua's avatar
bell@sanja.is.com.ua committed
988
    my_message(ER_GET_ERRNO, "Out of memory", MYF(0));
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
989
    DBUG_RETURN(1);
990
  }
991
  thd->query= query;
992
  thd->is_slave_error = 0;
993

monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
994 995
  bzero((char*) &tables,sizeof(tables));
  tables.db = (char*)db;
996
  tables.alias= tables.table_name= (char*)table_name;
bell@sanja.is.com.ua's avatar
bell@sanja.is.com.ua committed
997

monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
998
  /* Drop the table if 'overwrite' is true */
999
  if (overwrite)
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
1000
  {
1001 1002 1003 1004 1005 1006 1007 1008 1009 1010
    if (mysql_rm_table(thd,&tables,1,0)) /* drop if exists */
    {
      sql_print_error("create_table_from_dump: failed to drop the table");
      goto err;
    }
    else
    {
      /* Clear the OK result of mysql_rm_table(). */
      thd->main_da.reset_diagnostics_area();
    }
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
1011 1012
  }

1013
  /* Create the table. We do not want to log the "create table" statement */
1014
  save_options = thd->options;
1015 1016
  thd->options &= ~ (OPTION_BIN_LOG);
  thd_proc_info(thd, "Creating table from master dump");
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1017
  // save old db in case we are creating in a different database
1018
  save_db = thd->db;
1019
  save_db_length= thd->db_length;
1020
  thd->db = (char*)db;
1021
  DBUG_ASSERT(thd->db != 0);
1022
  thd->db_length= strlen(thd->db);
1023
  mysql_parse(thd, thd->query, packet_len, &found_semicolon); // run create table
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1024
  thd->db = save_db;            // leave things the way the were before
1025
  thd->db_length= save_db_length;
1026
  thd->options = save_options;
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1027

1028
  if (thd->is_slave_error)
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1029
    goto err;                   // mysql_parse took care of the error send
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1030

1031
  thd_proc_info(thd, "Opening master dump table");
1032
  thd->main_da.reset_diagnostics_area(); /* cleanup from CREATE_TABLE */
1033 1034 1035 1036 1037 1038
  /*
    Note: If this function starts to fail for MERGE tables,
    change the next two lines to these:
    tables.table= NULL; // was set by mysql_rm_table()
    if (!open_n_lock_single_table(thd, &tables, TL_WRITE))
  */
1039
  tables.lock_type = TL_WRITE;
1040
  if (!open_ltable(thd, &tables, TL_WRITE, 0))
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1041 1042
  {
    sql_print_error("create_table_from_dump: could not open created table");
1043
    goto err;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1044
  }
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1045

1046
  file = tables.table->file;
1047
  thd_proc_info(thd, "Reading master dump table data");
1048
  /* Copy the data file */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1049 1050
  if (file->net_read_dump(net))
  {
bell@sanja.is.com.ua's avatar
bell@sanja.is.com.ua committed
1051
    my_message(ER_MASTER_NET_READ, ER(ER_MASTER_NET_READ), MYF(0));
1052
    sql_print_error("create_table_from_dump: failed in\
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1053
 handler::net_read_dump()");
1054
    goto err;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1055
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1056 1057

  check_opt.init();
serg@serg.mysql.com's avatar
serg@serg.mysql.com committed
1058
  check_opt.flags|= T_VERY_SILENT | T_CALC_CHECKSUM | T_QUICK;
1059
  thd_proc_info(thd, "Rebuilding the index on master dump table");
1060 1061 1062 1063 1064
  /*
    We do not want repair() to spam us with messages
    just send them to the error log, and report the failure in case of
    problems.
  */
1065
  save_vio = thd->net.vio;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1066
  thd->net.vio = 0;
1067
  /* Rebuild the index file from the copied data file (with REPAIR) */
1068
  error=file->ha_repair(thd,&check_opt) != 0;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1069
  thd->net.vio = save_vio;
1070
  if (error)
1071
    my_error(ER_INDEX_REBUILD, MYF(0), tables.table->s->table_name.str);
1072 1073

err:
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1074
  close_thread_tables(thd);
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1075
  DBUG_RETURN(error);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1076 1077
}

1078

1079
int fetch_master_table(THD *thd, const char *db_name, const char *table_name,
1080
                       Master_info *mi, MYSQL *mysql, bool overwrite)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1081
{
1082 1083 1084 1085 1086
  int error= 1;
  const char *errmsg=0;
  bool called_connected= (mysql != NULL);
  DBUG_ENTER("fetch_master_table");
  DBUG_PRINT("enter", ("db_name: '%s'  table_name: '%s'",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1087
                       db_name,table_name));
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1088

monty@work.mysql.com's avatar
merge  
monty@work.mysql.com committed
1089
  if (!called_connected)
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1090
  {
hf@deer.mysql.r18.ru's avatar
SCRUM  
hf@deer.mysql.r18.ru committed
1091
    if (!(mysql = mysql_init(NULL)))
1092 1093 1094
    {
      DBUG_RETURN(1);
    }
monty@work.mysql.com's avatar
merge  
monty@work.mysql.com committed
1095
    if (connect_to_master(thd, mysql, mi))
1096
    {
1097
      my_error(ER_CONNECT_TO_MASTER, MYF(0), mysql_error(mysql));
1098 1099 1100 1101 1102 1103 1104 1105 1106
      /*
        We need to clear the active VIO since, theoretically, somebody
        might issue an awake() on this thread.  If we are then in the
        middle of closing and destroying the VIO inside the
        mysql_close(), we will have a problem.
       */
#ifdef SIGNAL_WITH_VIO_CLOSE
      thd->clear_active_vio();
#endif
hf@deer.mysql.r18.ru's avatar
SCRUM  
hf@deer.mysql.r18.ru committed
1107
      mysql_close(mysql);
1108
      DBUG_RETURN(1);
1109
    }
1110 1111
    if (thd->killed)
      goto err;
1112
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1113

sasha@mysql.sashanet.com's avatar
sasha@mysql.sashanet.com committed
1114
  if (request_table_dump(mysql, db_name, table_name))
1115
  {
1116 1117
    error= ER_UNKNOWN_ERROR;
    errmsg= "Failed on table dump request";
1118 1119
    goto err;
  }
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
1120
  if (create_table_from_dump(thd, mysql, db_name,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1121
                             table_name, overwrite))
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
1122
    goto err;    // create_table_from_dump have sent the error already
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1123
  error = 0;
1124

bk@work.mysql.com's avatar
bk@work.mysql.com committed
1125
 err:
1126
  if (!called_connected)
hf@deer.mysql.r18.ru's avatar
SCRUM  
hf@deer.mysql.r18.ru committed
1127
    mysql_close(mysql);
1128
  if (errmsg && thd->vio_ok())
1129
    my_message(error, errmsg, MYF(0));
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1130
  DBUG_RETURN(test(error));                     // Return 1 on error
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1131
}
1132

1133

1134
static bool wait_for_relay_log_space(Relay_log_info* rli)
1135
{
1136
  bool slave_killed=0;
1137
  Master_info* mi = rli->mi;
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
1138
  const char *save_proc_info;
1139 1140
  THD* thd = mi->io_thd;
  DBUG_ENTER("wait_for_relay_log_space");
1141

1142
  pthread_mutex_lock(&rli->log_space_lock);
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
1143
  save_proc_info= thd->enter_cond(&rli->log_space_cond,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1144 1145
                                  &rli->log_space_lock,
                                  "\
1146
Waiting for the slave SQL thread to free enough relay log space");
1147
  while (rli->log_space_limit < rli->log_space_total &&
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1148
         !(slave_killed=io_slave_killed(thd,mi)) &&
1149
         !rli->ignore_log_space_limit)
1150
    pthread_cond_wait(&rli->log_space_cond, &rli->log_space_lock);
guilhem@mysql.com's avatar
guilhem@mysql.com committed
1151
  thd->exit_cond(save_proc_info);
1152 1153
  DBUG_RETURN(slave_killed);
}
1154

1155

1156 1157
/*
  Builds a Rotate from the ignored events' info and writes it to relay log.
1158

1159 1160 1161 1162 1163 1164 1165 1166 1167 1168
  SYNOPSIS
  write_ignored_events_info_to_relay_log()
    thd             pointer to I/O thread's thd
    mi

  DESCRIPTION
    Slave I/O thread, going to die, must leave a durable trace of the
    ignored events' end position for the use of the slave SQL thread, by
    calling this function. Only that thread can call it (see assertion).
 */
1169
static void write_ignored_events_info_to_relay_log(THD *thd, Master_info *mi)
1170
{
1171
  Relay_log_info *rli= &mi->rli;
1172
  pthread_mutex_t *log_lock= rli->relay_log.get_log_lock();
1173 1174
  DBUG_ENTER("write_ignored_events_info_to_relay_log");

1175 1176 1177
  DBUG_ASSERT(thd == mi->io_thd);
  pthread_mutex_lock(log_lock);
  if (rli->ign_master_log_name_end[0])
1178
  {
1179
    DBUG_PRINT("info",("writing a Rotate event to track down ignored events"));
1180
    Rotate_log_event *ev= new Rotate_log_event(rli->ign_master_log_name_end,
1181 1182 1183 1184 1185 1186 1187 1188 1189
                                               0, rli->ign_master_log_pos_end,
                                               Rotate_log_event::DUP_NAME);
    rli->ign_master_log_name_end[0]= 0;
    /* can unlock before writing as slave SQL thd will soon see our Rotate */
    pthread_mutex_unlock(log_lock);
    if (likely((bool)ev))
    {
      ev->server_id= 0; // don't be ignored by slave SQL thread
      if (unlikely(rli->relay_log.append(ev)))
1190 1191 1192 1193 1194
        mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
                   ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
                   "failed to write a Rotate event"
                   " to the relay log, SHOW SLAVE STATUS may be"
                   " inaccurate");
1195
      rli->relay_log.harvest_bytes_written(&rli->log_space_total);
1196 1197
      if (flush_master_info(mi, 1))
        sql_print_error("Failed to flush master info file");
1198 1199 1200
      delete ev;
    }
    else
1201 1202 1203 1204
      mi->report(ERROR_LEVEL, ER_SLAVE_CREATE_EVENT_FAILURE,
                 ER(ER_SLAVE_CREATE_EVENT_FAILURE),
                 "Rotate_event (out of memory?),"
                 " SHOW SLAVE STATUS may be inaccurate");
1205
  }
1206 1207
  else
    pthread_mutex_unlock(log_lock);
1208
  DBUG_VOID_RETURN;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1209 1210
}

1211

1212
int register_slave_on_master(MYSQL* mysql, Master_info *mi,
1213
                             bool *suppress_warnings)
1214
{
1215
  uchar buf[1024], *pos= buf;
1216
  uint report_host_len, report_user_len=0, report_password_len=0;
1217
  DBUG_ENTER("register_slave_on_master");
1218

1219
  *suppress_warnings= FALSE;
1220
  if (!report_host)
1221
    DBUG_RETURN(0);
1222
  report_host_len= strlen(report_host);
1223
  if (report_user)
1224
    report_user_len= strlen(report_user);
1225
  if (report_password)
1226 1227 1228 1229
    report_password_len= strlen(report_password);
  /* 30 is a good safety margin */
  if (report_host_len + report_user_len + report_password_len + 30 >
      sizeof(buf))
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1230
    DBUG_RETURN(0);                                     // safety
1231 1232

  int4store(pos, server_id); pos+= 4;
1233 1234 1235
  pos= net_store_data(pos, (uchar*) report_host, report_host_len);
  pos= net_store_data(pos, (uchar*) report_user, report_user_len);
  pos= net_store_data(pos, (uchar*) report_password, report_password_len);
1236
  int2store(pos, (uint16) report_port); pos+= 2;
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1237
  int4store(pos, rpl_recovery_rank);    pos+= 4;
1238
  /* The master will fill in master_id */
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1239
  int4store(pos, 0);                    pos+= 4;
1240

1241
  if (simple_command(mysql, COM_REGISTER_SLAVE, buf, (size_t) (pos- buf), 0))
1242
  {
1243 1244 1245 1246
    if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
    {
      *suppress_warnings= TRUE;                 // Suppress reconnect warning
    }
1247
    else if (!check_io_slave_killed(mi->io_thd, mi, NULL))
1248 1249 1250 1251 1252 1253 1254
    {
      char buf[256];
      my_snprintf(buf, sizeof(buf), "%s (Errno: %d)", mysql_error(mysql), 
                  mysql_errno(mysql));
      mi->report(ERROR_LEVEL, ER_SLAVE_MASTER_COM_FAILURE,
                 ER(ER_SLAVE_MASTER_COM_FAILURE), "COM_REGISTER_SLAVE", buf);
    }
1255
    DBUG_RETURN(1);
1256
  }
1257
  DBUG_RETURN(0);
1258 1259
}

1260

1261
bool show_master_info(THD* thd, Master_info* mi)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1262
{
1263
  // TODO: fix this for multi-master
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1264
  List<Item> field_list;
1265 1266 1267
  Protocol *protocol= thd->protocol;
  DBUG_ENTER("show_master_info");

guilhem@gbichot2's avatar
guilhem@gbichot2 committed
1268
  field_list.push_back(new Item_empty_string("Slave_IO_State",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1269
                                                     14));
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1270
  field_list.push_back(new Item_empty_string("Master_Host",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1271
                                                     sizeof(mi->host)));
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1272
  field_list.push_back(new Item_empty_string("Master_User",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1273
                                                     sizeof(mi->user)));
1274
  field_list.push_back(new Item_return_int("Master_Port", 7,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1275
                                           MYSQL_TYPE_LONG));
1276
  field_list.push_back(new Item_return_int("Connect_Retry", 10,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1277
                                           MYSQL_TYPE_LONG));
1278
  field_list.push_back(new Item_empty_string("Master_Log_File",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1279
                                             FN_REFLEN));
1280
  field_list.push_back(new Item_return_int("Read_Master_Log_Pos", 10,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1281
                                           MYSQL_TYPE_LONGLONG));
1282
  field_list.push_back(new Item_empty_string("Relay_Log_File",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1283
                                             FN_REFLEN));
1284
  field_list.push_back(new Item_return_int("Relay_Log_Pos", 10,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1285
                                           MYSQL_TYPE_LONGLONG));
1286
  field_list.push_back(new Item_empty_string("Relay_Master_Log_File",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1287
                                             FN_REFLEN));
1288 1289
  field_list.push_back(new Item_empty_string("Slave_IO_Running", 3));
  field_list.push_back(new Item_empty_string("Slave_SQL_Running", 3));
1290 1291 1292 1293 1294 1295
  field_list.push_back(new Item_empty_string("Replicate_Do_DB", 20));
  field_list.push_back(new Item_empty_string("Replicate_Ignore_DB", 20));
  field_list.push_back(new Item_empty_string("Replicate_Do_Table", 20));
  field_list.push_back(new Item_empty_string("Replicate_Ignore_Table", 23));
  field_list.push_back(new Item_empty_string("Replicate_Wild_Do_Table", 24));
  field_list.push_back(new Item_empty_string("Replicate_Wild_Ignore_Table",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1296
                                             28));
1297 1298 1299
  field_list.push_back(new Item_return_int("Last_Errno", 4, MYSQL_TYPE_LONG));
  field_list.push_back(new Item_empty_string("Last_Error", 20));
  field_list.push_back(new Item_return_int("Skip_Counter", 10,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1300
                                           MYSQL_TYPE_LONG));
1301
  field_list.push_back(new Item_return_int("Exec_Master_Log_Pos", 10,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1302
                                           MYSQL_TYPE_LONGLONG));
1303
  field_list.push_back(new Item_return_int("Relay_Log_Space", 10,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1304
                                           MYSQL_TYPE_LONGLONG));
1305
  field_list.push_back(new Item_empty_string("Until_Condition", 6));
1306
  field_list.push_back(new Item_empty_string("Until_Log_File", FN_REFLEN));
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1307
  field_list.push_back(new Item_return_int("Until_Log_Pos", 10,
1308
                                           MYSQL_TYPE_LONGLONG));
1309 1310 1311
  field_list.push_back(new Item_empty_string("Master_SSL_Allowed", 7));
  field_list.push_back(new Item_empty_string("Master_SSL_CA_File",
                                             sizeof(mi->ssl_ca)));
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1312
  field_list.push_back(new Item_empty_string("Master_SSL_CA_Path",
1313
                                             sizeof(mi->ssl_capath)));
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1314
  field_list.push_back(new Item_empty_string("Master_SSL_Cert",
1315
                                             sizeof(mi->ssl_cert)));
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1316
  field_list.push_back(new Item_empty_string("Master_SSL_Cipher",
1317
                                             sizeof(mi->ssl_cipher)));
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1318
  field_list.push_back(new Item_empty_string("Master_SSL_Key",
1319
                                             sizeof(mi->ssl_key)));
1320
  field_list.push_back(new Item_return_int("Seconds_Behind_Master", 10,
guilhem@gbichot2's avatar
guilhem@gbichot2 committed
1321
                                           MYSQL_TYPE_LONGLONG));
1322 1323
  field_list.push_back(new Item_empty_string("Master_SSL_Verify_Server_Cert",
                                             3));
1324 1325 1326 1327
  field_list.push_back(new Item_return_int("Last_IO_Errno", 4, MYSQL_TYPE_LONG));
  field_list.push_back(new Item_empty_string("Last_IO_Error", 20));
  field_list.push_back(new Item_return_int("Last_SQL_Errno", 4, MYSQL_TYPE_LONG));
  field_list.push_back(new Item_empty_string("Last_SQL_Error", 20));
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1328

1329 1330
  if (protocol->send_fields(&field_list,
                            Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
1331
    DBUG_RETURN(TRUE);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1332

1333 1334
  if (mi->host[0])
  {
1335
    DBUG_PRINT("info",("host is set: '%s'", mi->host));
1336
    String *packet= &thd->packet;
1337
    protocol->prepare_for_resend();
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1338

1339 1340 1341 1342 1343
    /*
      TODO: we read slave_running without run_lock, whereas these variables
      are updated under run_lock and not data_lock. In 5.0 we should lock
      run_lock on top of data_lock (with good order).
    */
1344 1345
    pthread_mutex_lock(&mi->data_lock);
    pthread_mutex_lock(&mi->rli.data_lock);
guilhem@gbichot2's avatar
guilhem@gbichot2 committed
1346 1347

    protocol->store(mi->io_thd ? mi->io_thd->proc_info : "", &my_charset_bin);
1348 1349
    protocol->store(mi->host, &my_charset_bin);
    protocol->store(mi->user, &my_charset_bin);
1350 1351
    protocol->store((uint32) mi->port);
    protocol->store((uint32) mi->connect_retry);
1352
    protocol->store(mi->master_log_name, &my_charset_bin);
1353
    protocol->store((ulonglong) mi->master_log_pos);
1354
    protocol->store(mi->rli.group_relay_log_name +
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1355 1356
                    dirname_length(mi->rli.group_relay_log_name),
                    &my_charset_bin);
1357 1358
    protocol->store((ulonglong) mi->rli.group_relay_log_pos);
    protocol->store(mi->rli.group_master_log_name, &my_charset_bin);
monty@mysql.com's avatar
monty@mysql.com committed
1359 1360
    protocol->store(mi->slave_running == MYSQL_SLAVE_RUN_CONNECT ?
                    "Yes" : "No", &my_charset_bin);
1361
    protocol->store(mi->rli.slave_running ? "Yes":"No", &my_charset_bin);
1362 1363 1364
    protocol->store(rpl_filter->get_do_db());
    protocol->store(rpl_filter->get_ignore_db());

1365 1366
    char buf[256];
    String tmp(buf, sizeof(buf), &my_charset_bin);
1367
    rpl_filter->get_do_table(&tmp);
1368
    protocol->store(&tmp);
1369
    rpl_filter->get_ignore_table(&tmp);
1370
    protocol->store(&tmp);
1371
    rpl_filter->get_wild_do_table(&tmp);
1372
    protocol->store(&tmp);
1373
    rpl_filter->get_wild_ignore_table(&tmp);
1374 1375
    protocol->store(&tmp);

1376 1377
    protocol->store(mi->rli.last_error().number);
    protocol->store(mi->rli.last_error().message, &my_charset_bin);
1378
    protocol->store((uint32) mi->rli.slave_skip_counter);
1379
    protocol->store((ulonglong) mi->rli.group_master_log_pos);
1380
    protocol->store((ulonglong) mi->rli.log_space_total);
1381 1382

    protocol->store(
1383 1384
      mi->rli.until_condition==Relay_log_info::UNTIL_NONE ? "None":
        ( mi->rli.until_condition==Relay_log_info::UNTIL_MASTER_POS? "Master":
1385 1386 1387
          "Relay"), &my_charset_bin);
    protocol->store(mi->rli.until_log_name, &my_charset_bin);
    protocol->store((ulonglong) mi->rli.until_log_pos);
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1388 1389

#ifdef HAVE_OPENSSL
1390 1391 1392 1393 1394 1395 1396 1397 1398
    protocol->store(mi->ssl? "Yes":"No", &my_charset_bin);
#else
    protocol->store(mi->ssl? "Ignored":"No", &my_charset_bin);
#endif
    protocol->store(mi->ssl_ca, &my_charset_bin);
    protocol->store(mi->ssl_capath, &my_charset_bin);
    protocol->store(mi->ssl_cert, &my_charset_bin);
    protocol->store(mi->ssl_cipher, &my_charset_bin);
    protocol->store(mi->ssl_key, &my_charset_bin);
guilhem@gbichot2's avatar
guilhem@gbichot2 committed
1399

1400 1401 1402 1403 1404 1405
    /*
      Seconds_Behind_Master: if SQL thread is running and I/O thread is
      connected, we can compute it otherwise show NULL (i.e. unknown).
    */
    if ((mi->slave_running == MYSQL_SLAVE_RUN_CONNECT) &&
        mi->rli.slave_running)
1406
    {
1407
      long time_diff= ((long)(time(0) - mi->rli.last_master_timestamp)
1408
                       - mi->clock_diff_with_master);
1409
      /*
1410 1411
        Apparently on some systems time_diff can be <0. Here are possible
        reasons related to MySQL:
1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423
        - the master is itself a slave of another master whose time is ahead.
        - somebody used an explicit SET TIMESTAMP on the master.
        Possible reason related to granularity-to-second of time functions
        (nothing to do with MySQL), which can explain a value of -1:
        assume the master's and slave's time are perfectly synchronized, and
        that at slave's connection time, when the master's timestamp is read,
        it is at the very end of second 1, and (a very short time later) when
        the slave's timestamp is read it is at the very beginning of second
        2. Then the recorded value for master is 1 and the recorded value for
        slave is 2. At SHOW SLAVE STATUS time, assume that the difference
        between timestamp of slave and rli->last_master_timestamp is 0
        (i.e. they are in the same second), then we get 0-(2-1)=-1 as a result.
1424 1425 1426 1427
        This confuses users, so we don't go below 0: hence the max().

        last_master_timestamp == 0 (an "impossible" timestamp 1970) is a
        special marker to say "consider we have caught up".
1428
      */
1429 1430
      protocol->store((longlong)(mi->rli.last_master_timestamp ?
                                 max(0, time_diff) : 0));
1431
    }
guilhem@gbichot2's avatar
guilhem@gbichot2 committed
1432
    else
1433
    {
guilhem@gbichot2's avatar
guilhem@gbichot2 committed
1434
      protocol->store_null();
1435 1436
    }
    protocol->store(mi->ssl_verify_server_cert? "Yes":"No", &my_charset_bin);
guilhem@gbichot2's avatar
guilhem@gbichot2 committed
1437

1438
    // Last_IO_Errno
1439
    protocol->store(mi->last_error().number);
1440
    // Last_IO_Error
1441
    protocol->store(mi->last_error().message, &my_charset_bin);
1442
    // Last_SQL_Errno
1443
    protocol->store(mi->rli.last_error().number);
1444
    // Last_SQL_Error
1445
    protocol->store(mi->rli.last_error().message, &my_charset_bin);
1446

1447 1448
    pthread_mutex_unlock(&mi->rli.data_lock);
    pthread_mutex_unlock(&mi->data_lock);
1449

1450
    if (my_net_write(&thd->net, (uchar*) thd->packet.ptr(), packet->length()))
1451
      DBUG_RETURN(TRUE);
1452
  }
1453
  send_eof(thd);
1454
  DBUG_RETURN(FALSE);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1455 1456
}

1457

1458 1459
void set_slave_thread_options(THD* thd)
{
1460
  DBUG_ENTER("set_slave_thread_options");
1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475
  /*
     It's nonsense to constrain the slave threads with max_join_size; if a
     query succeeded on master, we HAVE to execute it. So set
     OPTION_BIG_SELECTS. Setting max_join_size to HA_POS_ERROR is not enough
     (and it's not needed if we have OPTION_BIG_SELECTS) because an INSERT
     SELECT examining more than 4 billion rows would still fail (yes, because
     when max_join_size is 4G, OPTION_BIG_SELECTS is automatically set, but
     only for client threads.
  */
  ulonglong options= thd->options | OPTION_BIG_SELECTS;
  if (opt_log_slave_updates)
    options|= OPTION_BIN_LOG;
  else
    options&= ~OPTION_BIN_LOG;
  thd->options= options;
1476
  thd->variables.completion_type= 0;
1477
  DBUG_VOID_RETURN;
1478
}
1479

1480
void set_slave_thread_default_charset(THD* thd, Relay_log_info const *rli)
1481
{
1482 1483
  DBUG_ENTER("set_slave_thread_default_charset");

1484 1485 1486 1487 1488 1489 1490
  thd->variables.character_set_client=
    global_system_variables.character_set_client;
  thd->variables.collation_connection=
    global_system_variables.collation_connection;
  thd->variables.collation_server=
    global_system_variables.collation_server;
  thd->update_charset();
1491 1492 1493 1494 1495 1496 1497

  /*
    We use a const cast here since the conceptual (and externally
    visible) behavior of the function is to set the default charset of
    the thread.  That the cache has to be invalidated is a secondary
    effect.
   */
1498
  const_cast<Relay_log_info*>(rli)->cached_charset_invalidate();
1499
  DBUG_VOID_RETURN;
1500 1501
}

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
1502
/*
1503
  init_slave_thread()
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
1504
*/
1505

1506
static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1507 1508
{
  DBUG_ENTER("init_slave_thread");
1509
  thd->system_thread = (thd_type == SLAVE_THD_SQL) ?
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1510
    SYSTEM_THREAD_SLAVE_SQL : SYSTEM_THREAD_SLAVE_IO;
1511
  thd->security_ctx->skip_grants();
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1512
  my_net_init(&thd->net, 0);
1513 1514 1515 1516 1517
/*
  Adding MAX_LOG_EVENT_HEADER_LEN to the max_allowed_packet on all
  slave threads, since a replication event can become this much larger
  than the corresponding packet (query) sent from client to master.
*/
1518
  thd->variables.max_allowed_packet= global_system_variables.max_allowed_packet
1519
    + MAX_LOG_EVENT_HEADER;  /* note, incr over the global not session var */
1520
  thd->slave_thread = 1;
1521
  thd->enable_slow_log= opt_log_slow_slave_statements;
1522
  set_slave_thread_options(thd);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1523 1524
  thd->client_capabilities = CLIENT_LOCAL_FILES;
  pthread_mutex_lock(&LOCK_thread_count);
1525
  thd->thread_id= thd->variables.pseudo_thread_id= thread_id++;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1526 1527
  pthread_mutex_unlock(&LOCK_thread_count);

1528
  if (init_thr_lock() || thd->store_globals())
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1529
  {
1530 1531
    thd->cleanup();
    delete thd;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1532 1533
    DBUG_RETURN(-1);
  }
1534
  lex_start(thd);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1535

1536
  if (thd_type == SLAVE_THD_SQL)
1537
    thd_proc_info(thd, "Waiting for the next event in relay log");
1538
  else
1539
    thd_proc_info(thd, "Waiting for master update");
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1540 1541 1542 1543 1544
  thd->version=refresh_version;
  thd->set_time();
  DBUG_RETURN(0);
}

1545

1546
static int safe_sleep(THD* thd, int sec, CHECK_KILLED_FUNC thread_killed,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1547
                      void* thread_killed_arg)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1548
{
1549
  int nap_time;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1550
  thr_alarm_t alarmed;
1551 1552
  DBUG_ENTER("safe_sleep");

bk@work.mysql.com's avatar
bk@work.mysql.com committed
1553
  thr_alarm_init(&alarmed);
1554
  time_t start_time= my_time(0);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1555 1556
  time_t end_time= start_time+sec;

1557
  while ((nap_time= (int) (end_time - start_time)) > 0)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1558
  {
1559
    ALARM alarm_buff;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1560
    /*
1561
      The only reason we are asking for alarm is so that
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1562 1563 1564
      we will be woken up in case of murder, so if we do not get killed,
      set the alarm so it goes off after we wake up naturally
    */
1565
    thr_alarm(&alarmed, 2 * nap_time, &alarm_buff);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1566
    sleep(nap_time);
1567
    thr_end_alarm(&alarmed);
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1568

1569
    if ((*thread_killed)(thd,thread_killed_arg))
1570
      DBUG_RETURN(1);
1571
    start_time= my_time(0);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1572
  }
1573
  DBUG_RETURN(0);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1574 1575
}

1576

1577
static int request_dump(MYSQL* mysql, Master_info* mi,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1578
                        bool *suppress_warnings)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1579
{
1580
  uchar buf[FN_REFLEN + 10];
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1581 1582
  int len;
  int binlog_flags = 0; // for now
1583
  char* logname = mi->master_log_name;
1584
  DBUG_ENTER("request_dump");
1585 1586
  
  *suppress_warnings= FALSE;
1587

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1588
  // TODO if big log files: Change next to int8store()
monty@mysql.com's avatar
monty@mysql.com committed
1589
  int4store(buf, (ulong) mi->master_log_pos);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1590
  int2store(buf + 4, binlog_flags);
1591
  int4store(buf + 6, server_id);
1592
  len = (uint) strlen(logname);
1593
  memcpy(buf + 10, logname,len);
hf@deer.mysql.r18.ru's avatar
SCRUM  
hf@deer.mysql.r18.ru committed
1594
  if (simple_command(mysql, COM_BINLOG_DUMP, buf, len + 10, 1))
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1595
  {
1596 1597 1598 1599 1600
    /*
      Something went wrong, so we will just reconnect and retry later
      in the future, we should do a better error analysis, but for
      now we just fill up the error log :-)
    */
hf@deer.mysql.r18.ru's avatar
SCRUM  
hf@deer.mysql.r18.ru committed
1601
    if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
1602
      *suppress_warnings= TRUE;                 // Suppress reconnect warning
1603
    else
1604
      sql_print_error("Error on COM_BINLOG_DUMP: %d  %s, will retry in %d secs",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1605 1606
                      mysql_errno(mysql), mysql_error(mysql),
                      master_connect_retry);
1607
    DBUG_RETURN(1);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1608
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1609

1610
  DBUG_RETURN(0);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1611 1612
}

1613

1614
static int request_table_dump(MYSQL* mysql, const char* db, const char* table)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1615
{
1616
  uchar buf[1024], *p = buf;
1617 1618
  DBUG_ENTER("request_table_dump");

1619 1620
  uint table_len = (uint) strlen(table);
  uint db_len = (uint) strlen(db);
1621
  if (table_len + db_len > sizeof(buf) - 2)
1622 1623
  {
    sql_print_error("request_table_dump: Buffer overrun");
1624
    DBUG_RETURN(1);
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1625 1626
  }

bk@work.mysql.com's avatar
bk@work.mysql.com committed
1627 1628 1629 1630 1631
  *p++ = db_len;
  memcpy(p, db, db_len);
  p += db_len;
  *p++ = table_len;
  memcpy(p, table, table_len);
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1632

hf@deer.mysql.r18.ru's avatar
SCRUM  
hf@deer.mysql.r18.ru committed
1633
  if (simple_command(mysql, COM_TABLE_DUMP, buf, p - buf + table_len, 1))
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1634 1635
  {
    sql_print_error("request_table_dump: Error sending the table dump \
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1636
command");
1637
    DBUG_RETURN(1);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1638
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1639

1640
  DBUG_RETURN(0);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1641 1642
}

1643

1644
/*
1645
  Read one event from the master
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1646

1647 1648
  SYNOPSIS
    read_event()
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1649 1650 1651 1652 1653 1654
    mysql               MySQL connection
    mi                  Master connection information
    suppress_warnings   TRUE when a normal net read timeout has caused us to
                        try a reconnect.  We do not want to print anything to
                        the error log in this case because this a anormal
                        event in an idle server.
1655

1656
    RETURN VALUES
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1657 1658
    'packet_error'      Error
    number              Length of packet
1659 1660
*/

1661
static ulong read_event(MYSQL* mysql, Master_info *mi, bool* suppress_warnings)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1662
{
1663
  ulong len;
1664
  DBUG_ENTER("read_event");
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1665

1666
  *suppress_warnings= FALSE;
1667 1668 1669 1670
  /*
    my_real_read() will time us out
    We check if we were told to die, and if not, try reading again
  */
1671
#ifndef DBUG_OFF
1672
  if (disconnect_slave_event_count && !(mi->events_till_disconnect--))
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1673
    DBUG_RETURN(packet_error);
1674
#endif
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1675

1676
  len = cli_safe_read(mysql);
1677
  if (len == packet_error || (long) len < 1)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1678
  {
hf@deer.mysql.r18.ru's avatar
SCRUM  
hf@deer.mysql.r18.ru committed
1679
    if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
1680 1681
    {
      /*
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1682 1683 1684
        We are trying a normal reconnect after a read timeout;
        we suppress prints to .err file as long as the reconnect
        happens without problems
1685 1686 1687 1688
      */
      *suppress_warnings= TRUE;
    }
    else
1689
      sql_print_error("Error reading packet from server: %s ( server_errno=%d)",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1690
                      mysql_error(mysql), mysql_errno(mysql));
1691
    DBUG_RETURN(packet_error);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1692 1693
  }

1694 1695
  /* Check if eof packet */
  if (len < 8 && mysql->net.read_pos[0] == 254)
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1696
  {
1697 1698
    sql_print_information("Slave: received end packet from server, apparent "
                          "master shutdown: %s",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1699
                     mysql_error(mysql));
1700
     DBUG_RETURN(packet_error);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1701
  }
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1702

1703
  DBUG_PRINT("exit", ("len: %lu  net->read_pos[4]: %d",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1704 1705
                      len, mysql->net.read_pos[4]));
  DBUG_RETURN(len - 1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1706 1707
}

1708

1709
int check_expected_error(THD* thd, Relay_log_info const *rli,
1710
                         int expected_error)
1711
{
1712 1713
  DBUG_ENTER("check_expected_error");

1714 1715
  switch (expected_error) {
  case ER_NET_READ_ERROR:
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1716
  case ER_NET_ERROR_ON_WRITE:
1717
  case ER_QUERY_INTERRUPTED:
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
1718
  case ER_SERVER_SHUTDOWN:
1719
  case ER_NEW_ABORTING_CONNECTION:
1720
    DBUG_RETURN(1);
1721
  default:
1722
    DBUG_RETURN(0);
1723
  }
1724
}
1725

1726

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
1727 1728 1729 1730 1731 1732 1733 1734 1735
/*
  Check if the current error is of temporary nature of not.
  Some errors are temporary in nature, such as
  ER_LOCK_DEADLOCK and ER_LOCK_WAIT_TIMEOUT.  Ndb also signals
  that the error is temporary by pushing a warning with the error code
  ER_GET_TEMPORARY_ERRMSG, if the originating error is temporary.
*/
static int has_temporary_error(THD *thd)
{
1736 1737
  DBUG_ENTER("has_temporary_error");

tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
1738
  if (thd->is_fatal_error)
1739
    DBUG_RETURN(0);
1740 1741

  DBUG_EXECUTE_IF("all_errors_are_temporary_errors",
1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754
                  if (thd->main_da.is_error())
                  {
                    thd->clear_error();
                    my_error(ER_LOCK_DEADLOCK, MYF(0));
                  });

  /*
    If there is no message in THD, we can't say if it's a temporary
    error or not. This is currently the case for Incident_log_event,
    which sets no message. Return FALSE.
  */
  if (!thd->is_error())
    DBUG_RETURN(0);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
1755 1756 1757 1758 1759 1760

  /*
    Temporary error codes:
    currently, InnoDB deadlock detected by InnoDB or lock
    wait timeout (innodb_lock_wait_timeout exceeded
  */
1761 1762
  if (thd->main_da.sql_errno() == ER_LOCK_DEADLOCK ||
      thd->main_da.sql_errno() == ER_LOCK_WAIT_TIMEOUT)
1763
    DBUG_RETURN(1);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
1764 1765 1766 1767 1768 1769 1770 1771 1772

#ifdef HAVE_NDB_BINLOG
  /*
    currently temporary error set in ndbcluster
  */
  List_iterator_fast<MYSQL_ERROR> it(thd->warn_list);
  MYSQL_ERROR *err;
  while ((err= it++))
  {
serg@serg.mylan's avatar
serg@serg.mylan committed
1773
    DBUG_PRINT("info", ("has warning %d %s", err->code, err->msg));
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
1774 1775 1776
    switch (err->code)
    {
    case ER_GET_TEMPORARY_ERRMSG:
1777
      DBUG_RETURN(1);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
1778 1779 1780 1781 1782
    default:
      break;
    }
  }
#endif
1783
  DBUG_RETURN(0);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
1784
}
1785

1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894

/**
  Applies the given event and advances the relay log position.

  In essence, this function does:

  @code
    ev->apply_event(rli);
    ev->update_pos(rli);
  @endcode

  But it also does some maintainance, such as skipping events if
  needed and reporting errors.

  If the @c skip flag is set, then it is tested whether the event
  should be skipped, by looking at the slave_skip_counter and the
  server id.  The skip flag should be set when calling this from a
  replication thread but not set when executing an explicit BINLOG
  statement.

  @retval 0 OK.

  @retval 1 Error calling ev->apply_event().

  @retval 2 No error calling ev->apply_event(), but error calling
  ev->update_pos().
*/
int apply_event_and_update_pos(Log_event* ev, THD* thd, Relay_log_info* rli,
                               bool skip)
{
  int exec_res= 0;

  DBUG_ENTER("apply_event_and_update_pos");

  DBUG_PRINT("exec_event",("%s(type_code: %d; server_id: %d)",
                           ev->get_type_str(), ev->get_type_code(),
                           ev->server_id));
  DBUG_PRINT("info", ("thd->options: %s%s; rli->last_event_start_time: %lu",
                      FLAGSTR(thd->options, OPTION_NOT_AUTOCOMMIT),
                      FLAGSTR(thd->options, OPTION_BEGIN),
                      rli->last_event_start_time));

  /*
    Execute the event to change the database and update the binary
    log coordinates, but first we set some data that is needed for
    the thread.

    The event will be executed unless it is supposed to be skipped.

    Queries originating from this server must be skipped.  Low-level
    events (Format_description_log_event, Rotate_log_event,
    Stop_log_event) from this server must also be skipped. But for
    those we don't want to modify 'group_master_log_pos', because
    these events did not exist on the master.
    Format_description_log_event is not completely skipped.

    Skip queries specified by the user in 'slave_skip_counter'.  We
    can't however skip events that has something to do with the log
    files themselves.

    Filtering on own server id is extremely important, to ignore
    execution of events created by the creation/rotation of the relay
    log (remember that now the relay log starts with its Format_desc,
    has a Rotate etc).
  */

  thd->server_id = ev->server_id; // use the original server id for logging
  thd->set_time();                            // time the query
  thd->lex->current_select= 0;
  if (!ev->when)
    ev->when= my_time(0);
  ev->thd = thd; // because up to this point, ev->thd == 0

  if (skip)
  {
    int reason= ev->shall_skip(rli);
    if (reason == Log_event::EVENT_SKIP_COUNT)
      --rli->slave_skip_counter;
    pthread_mutex_unlock(&rli->data_lock);
    if (reason == Log_event::EVENT_SKIP_NOT)
      exec_res= ev->apply_event(rli);
#ifndef DBUG_OFF
    /*
      This only prints information to the debug trace.

      TODO: Print an informational message to the error log?
    */
    static const char *const explain[] = {
      // EVENT_SKIP_NOT,
      "not skipped",
      // EVENT_SKIP_IGNORE,
      "skipped because event should be ignored",
      // EVENT_SKIP_COUNT
      "skipped because event skip counter was non-zero"
    };
    DBUG_PRINT("info", ("OPTION_BEGIN: %d; IN_STMT: %d",
                        thd->options & OPTION_BEGIN ? 1 : 0,
                        rli->get_flag(Relay_log_info::IN_STMT)));
    DBUG_PRINT("skip_event", ("%s event was %s",
                              ev->get_type_str(), explain[reason]));
#endif
  }
  else
    exec_res= ev->apply_event(rli);

  DBUG_PRINT("info", ("apply_event error = %d", exec_res));
  if (exec_res == 0)
  {
    int error= ev->update_pos(rli);
1895 1896 1897 1898
#ifdef HAVE_purify
    if (!rli->is_fake)
#endif
    {
1899
#ifndef DBUG_OFF
1900
      char buf[22];
1901
#endif
1902 1903 1904 1905 1906 1907 1908 1909
      DBUG_PRINT("info", ("update_pos error = %d", error));
      DBUG_PRINT("info", ("group %s %s",
                          llstr(rli->group_relay_log_pos, buf),
                          rli->group_relay_log_name));
      DBUG_PRINT("info", ("event %s %s",
                          llstr(rli->event_relay_log_pos, buf),
                          rli->event_relay_log_name));
    }
1910 1911 1912 1913 1914 1915 1916 1917 1918
    /*
      The update should not fail, so print an error message and
      return an error code.

      TODO: Replace this with a decent error message when merged
      with BUG#24954 (which adds several new error message).
    */
    if (error)
    {
1919
      char buf[22];
1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962
      rli->report(ERROR_LEVEL, ER_UNKNOWN_ERROR,
                  "It was not possible to update the positions"
                  " of the relay log information: the slave may"
                  " be in an inconsistent state."
                  " Stopped in %s position %s",
                  rli->group_relay_log_name,
                  llstr(rli->group_relay_log_pos, buf));
      DBUG_RETURN(2);
    }
  }

  DBUG_RETURN(exec_res ? 1 : 0);
}


/**
  Top-level function for executing the next event from the relay log.

  This function reads the event from the relay log, executes it, and
  advances the relay log position.  It also handles errors, etc.

  This function may fail to apply the event for the following reasons:

   - The position specfied by the UNTIL condition of the START SLAVE
     command is reached.

   - It was not possible to read the event from the log.

   - The slave is killed.

   - An error occurred when applying the event, and the event has been
     tried slave_trans_retries times.  If the event has been retried
     fewer times, 0 is returned.

   - init_master_info or init_relay_log_pos failed. (These are called
     if a failure occurs when applying the event.)</li>

   - An error occurred when updating the binlog position.

  @retval 0 The event was applied.

  @retval 1 The event was not applied.
*/
1963
static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
1964
{
1965 1966
  DBUG_ENTER("exec_relay_log_event");

1967 1968
  /*
     We acquire this mutex since we need it for all operations except
1969
     event execution. But we will release it in places where we will
1970 1971 1972
     wait for something for example inside of next_event().
   */
  pthread_mutex_lock(&rli->data_lock);
1973 1974 1975 1976 1977 1978 1979 1980
  /*
    This tests if the position of the end of the last previous executed event
    hits the UNTIL barrier.
    We would prefer to test if the position of the start (or possibly) end of
    the to-be-read event hits the UNTIL barrier, this is different if there
    was an event ignored by the I/O thread just before (BUG#13861 to be
    fixed).
  */
1981
  if (rli->until_condition!=Relay_log_info::UNTIL_NONE &&
1982
      rli->is_until_satisfied())
1983
  {
1984
    char buf[22];
1985
    sql_print_information("Slave SQL thread stopped because it reached its"
1986
                    " UNTIL position %s", llstr(rli->until_pos(), buf));
1987
    /*
1988 1989 1990 1991 1992
      Setting abort_slave flag because we do not want additional message about
      error in query execution to be printed.
    */
    rli->abort_slave= 1;
    pthread_mutex_unlock(&rli->data_lock);
1993
    DBUG_RETURN(1);
1994
  }
1995

1996
  Log_event * ev = next_event(rli);
1997

1998
  DBUG_ASSERT(rli->sql_thd==thd);
1999

2000
  if (sql_slave_killed(thd,rli))
2001
  {
2002
    pthread_mutex_unlock(&rli->data_lock);
2003
    delete ev;
2004
    DBUG_RETURN(1);
2005
  }
2006 2007
  if (ev)
  {
2008
    int exec_res= apply_event_and_update_pos(ev, thd, rli, TRUE);
2009

2010
    /*
2011 2012 2013
      Format_description_log_event should not be deleted because it will be
      used to read info about the relay log's format; it will be deleted when
      the SQL thread does not need it, i.e. when this thread terminates.
2014
    */
2015
    if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
2016
    {
2017 2018
      DBUG_PRINT("info", ("Deleting the event after it has been executed"));
      delete ev;
2019 2020
    }

2021
    /*
2022 2023
      update_log_pos failed: this should not happen, so we don't
      retry.
2024
    */
2025 2026 2027
    if (exec_res == 2)
      DBUG_RETURN(1);

2028 2029
    if (slave_trans_retries)
    {
2030 2031
      int temp_err;
      if (exec_res && (temp_err= has_temporary_error(thd)))
2032 2033 2034 2035
      {
        const char *errmsg;
        /*
          We were in a transaction which has been rolled back because of a
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2036 2037
          temporary error;
          let's seek back to BEGIN log event and retry it all again.
2038
	  Note, if lock wait timeout (innodb_lock_wait_timeout exceeded)
2039
	  there is no rollback since 5.0.13 (ref: manual).
2040 2041 2042 2043 2044 2045 2046 2047
          We have to not only seek but also
          a) init_master_info(), to seek back to hot relay log's start for later
          (for when we will come back to this hot log after re-processing the
          possibly existing old logs where BEGIN is: check_binlog_magic() will
          then need the cache to be at position 0 (see comments at beginning of
          init_master_info()).
          b) init_relay_log_pos(), because the BEGIN may be an older relay log.
        */
2048
        if (rli->trans_retries < slave_trans_retries)
2049 2050 2051 2052 2053 2054
        {
          if (init_master_info(rli->mi, 0, 0, 0, SLAVE_SQL))
            sql_print_error("Failed to initialize the master info structure");
          else if (init_relay_log_pos(rli,
                                      rli->group_relay_log_name,
                                      rli->group_relay_log_pos,
2055
                                      1, &errmsg, 1))
2056 2057 2058 2059 2060
            sql_print_error("Error initializing relay log position: %s",
                            errmsg);
          else
          {
            exec_res= 0;
2061
            end_trans(thd, ROLLBACK);
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2062
            /* chance for concurrent connection to get more locks */
2063
            safe_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2064
                       (CHECK_KILLED_FUNC)sql_slave_killed, (void*)rli);
2065
            pthread_mutex_lock(&rli->data_lock); // because of SHOW STATUS
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2066
            rli->trans_retries++;
2067 2068 2069 2070
            rli->retried_trans++;
            pthread_mutex_unlock(&rli->data_lock);
            DBUG_PRINT("info", ("Slave retries transaction "
                                "rli->trans_retries: %lu", rli->trans_retries));
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2071
          }
2072 2073 2074 2075 2076 2077 2078
        }
        else
          sql_print_error("Slave SQL thread retried transaction %lu time(s) "
                          "in vain, giving up. Consider raising the value of "
                          "the slave_transaction_retries variable.",
                          slave_trans_retries);
      }
2079 2080 2081
      else if (exec_res && !temp_err ||
               (opt_using_transactions &&
                rli->group_relay_log_pos == rli->event_relay_log_pos))
2082 2083
      {
        /*
2084 2085 2086
          Only reset the retry counter if the entire group succeeded
          or failed with a non-transient error.  On a successful
          event, the execution will proceed as usual; in the case of a
2087 2088 2089
          non-transient error, the slave will stop with an error.
         */
        rli->trans_retries= 0; // restart from fresh
2090
        DBUG_PRINT("info", ("Resetting retry counter, rli->trans_retries: %lu",
2091
                            rli->trans_retries));
2092
      }
2093 2094
    }
    DBUG_RETURN(exec_res);
2095
  }
2096
  pthread_mutex_unlock(&rli->data_lock);
2097 2098
  rli->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_READ_FAILURE,
              ER(ER_SLAVE_RELAY_LOG_READ_FAILURE), "\
2099 2100 2101 2102 2103 2104 2105
Could not parse relay log event entry. The possible reasons are: the master's \
binary log is corrupted (you can check this by running 'mysqlbinlog' on the \
binary log), the slave's relay log is corrupted (you can check this by running \
'mysqlbinlog' on the relay log), a network problem, or a bug in the master's \
or slave's MySQL code. If you want to check the master's binary log or slave's \
relay log, you will be able to know their names by issuing 'SHOW SLAVE STATUS' \
on this slave.\
jcole@tetra.spaceapes.com's avatar
jcole@tetra.spaceapes.com committed
2106
");
2107
  DBUG_RETURN(1);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2108 2109
}

2110

2111
static bool check_io_slave_killed(THD *thd, Master_info *mi, const char *info)
2112 2113 2114
{
  if (io_slave_killed(thd, mi))
  {
2115
    if (info && global_system_variables.log_warnings)
2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148
      sql_print_information(info);
    return TRUE;
  }
  return FALSE;
}


/**
  @brief Try to reconnect slave IO thread.

  @details Terminates current connection to master, sleeps for
  @c mi->connect_retry msecs and initiates new connection with
  @c safe_reconnect(). Variable pointed by @c retry_count is increased -
  if it exceeds @c master_retry_count then connection is not re-established
  and function signals error.
  Unless @c suppres_warnings is TRUE, a warning is put in the server error log
  when reconnecting. The warning message and messages used to report errors
  are taken from @c messages array. In case @c master_retry_count is exceeded,
  no messages are added to the log.

  @param[in]     thd                 Thread context.
  @param[in]     mysql               MySQL connection.
  @param[in]     mi                  Master connection information.
  @param[in,out] retry_count         Number of attempts to reconnect.
  @param[in]     suppress_warnings   TRUE when a normal net read timeout 
                                     has caused to reconnecting.
  @param[in]     messages            Messages to print/log, see 
                                     reconnect_messages[] array.

  @retval        0                   OK.
  @retval        1                   There was an error.
*/

2149
static int try_to_reconnect(THD *thd, MYSQL *mysql, Master_info *mi,
2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198
                            uint *retry_count, bool suppress_warnings,
                            const char *messages[SLAVE_RECON_MSG_MAX])
{
  mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT;
  thd->proc_info= messages[SLAVE_RECON_MSG_WAIT];
#ifdef SIGNAL_WITH_VIO_CLOSE  
  thd->clear_active_vio();
#endif
  end_server(mysql);
  if ((*retry_count)++)
  {
    if (*retry_count > master_retry_count)
      return 1;                             // Don't retry forever
    safe_sleep(thd, mi->connect_retry, (CHECK_KILLED_FUNC) io_slave_killed,
               (void *) mi);
  }
  if (check_io_slave_killed(thd, mi, messages[SLAVE_RECON_MSG_KILLED_WAITING]))
    return 1;
  thd->proc_info = messages[SLAVE_RECON_MSG_AFTER];
  if (!suppress_warnings) 
  {
    char buf[256], llbuff[22];
    my_snprintf(buf, sizeof(buf), messages[SLAVE_RECON_MSG_FAILED], 
                IO_RPL_LOG_NAME, llstr(mi->master_log_pos, llbuff));
    /* 
      Raise a warining during registering on master/requesting dump.
      Log a message reading event.
    */
    if (messages[SLAVE_RECON_MSG_COMMAND][0])
    {
      mi->report(WARNING_LEVEL, ER_SLAVE_MASTER_COM_FAILURE,
                 ER(ER_SLAVE_MASTER_COM_FAILURE), 
                 messages[SLAVE_RECON_MSG_COMMAND], buf);
    }
    else
    {
      sql_print_information(buf);
    }
  }
  if (safe_reconnect(thd, mysql, mi, 1) || io_slave_killed(thd, mi))
  {
    if (global_system_variables.log_warnings)
      sql_print_information(messages[SLAVE_RECON_MSG_KILLED_AFTER]);
    return 1;
  }
  return 0;
}


monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2199
/* Slave I/O Thread entry point */
2200

2201
pthread_handler_t handle_slave_io(void *arg)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2202
{
2203 2204
  THD *thd; // needs to be first for thread_stack
  MYSQL *mysql;
2205
  Master_info *mi = (Master_info*)arg;
2206
  Relay_log_info *rli= &mi->rli;
2207 2208
  char llbuff[22];
  uint retry_count;
2209 2210 2211 2212
  bool suppress_warnings;
#ifndef DBUG_OFF
  uint retry_count_reg= 0, retry_count_dump= 0, retry_count_event= 0;
#endif
2213 2214
  // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
  my_thread_init();
2215
  DBUG_ENTER("handle_slave_io");
2216

2217
  DBUG_ASSERT(mi->inited);
2218 2219 2220
  mysql= NULL ;
  retry_count= 0;

2221
  pthread_mutex_lock(&mi->run_lock);
2222 2223 2224
  /* Inform waiting threads that slave has started */
  mi->slave_run_id++;

2225
#ifndef DBUG_OFF
2226
  mi->events_till_disconnect = disconnect_slave_event_count;
2227 2228
#endif

2229
  thd= new THD; // note that contructor of THD uses DBUG_ !
2230
  THD_CHECK_SENTRY(thd);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2231 2232

  pthread_detach_this_thread();
2233
  thd->thread_stack= (char*) &thd; // remember where our stack is
2234
  if (init_slave_thread(thd, SLAVE_THD_IO))
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2235 2236 2237 2238 2239 2240
  {
    pthread_cond_broadcast(&mi->start_cond);
    pthread_mutex_unlock(&mi->run_lock);
    sql_print_error("Failed during slave I/O thread initialization");
    goto err;
  }
2241
  mi->io_thd = thd;
2242
  pthread_mutex_lock(&LOCK_thread_count);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2243
  threads.append(thd);
2244
  pthread_mutex_unlock(&LOCK_thread_count);
2245 2246 2247
  mi->slave_running = 1;
  mi->abort_slave = 0;
  pthread_mutex_unlock(&mi->run_lock);
2248
  pthread_cond_broadcast(&mi->start_cond);
2249

2250
  DBUG_PRINT("master_info",("log_file_name: '%s'  position: %s",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2251 2252
                            mi->master_log_name,
                            llstr(mi->master_log_pos,llbuff)));
2253

hf@deer.mysql.r18.ru's avatar
SCRUM  
hf@deer.mysql.r18.ru committed
2254
  if (!(mi->mysql = mysql = mysql_init(NULL)))
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
2255
  {
2256 2257
    mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
               ER(ER_SLAVE_FATAL_ERROR), "error in mysql_init()");
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
2258 2259
    goto err;
  }
2260

2261
  thd_proc_info(thd, "Connecting to master");
2262
  // we can get killed during safe_connect
2263
  if (!safe_connect(thd, mysql, mi))
2264
  {
2265 2266 2267
    sql_print_information("Slave I/O thread: connected to master '%s@%s:%d',"
                          "replication started in log '%s' at position %s",
                          mi->user, mi->host, mi->port,
2268 2269
			  IO_RPL_LOG_NAME,
			  llstr(mi->master_log_pos,llbuff));
2270 2271 2272 2273 2274
  /*
    Adding MAX_LOG_EVENT_HEADER_LEN to the max_packet_size on the I/O
    thread, since a replication event can become this much larger than
    the corresponding packet (query) sent from client to master.
  */
2275 2276
    mysql->net.max_packet_size= thd->net.max_packet_size+= MAX_LOG_EVENT_HEADER;
  }
2277
  else
2278
  {
2279
    sql_print_information("Slave I/O thread killed while connecting to master");
2280 2281
    goto err;
  }
2282

2283
connected:
2284

2285 2286
  // TODO: the assignment below should be under mutex (5.0)
  mi->slave_running= MYSQL_SLAVE_RUN_CONNECT;
2287
  thd->slave_net = &mysql->net;
2288
  thd_proc_info(thd, "Checking master version");
guilhem@gbichot2's avatar
guilhem@gbichot2 committed
2289
  if (get_master_version_and_clock(mysql, mi))
2290
    goto err;
2291 2292

  if (mi->rli.relay_log.description_event_for_queue->binlog_version > 1)
2293
  {
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2294 2295 2296
    /*
      Register ourselves with the master.
    */
2297
    thd_proc_info(thd, "Registering slave on master");
2298
    if (register_slave_on_master(mysql, mi, &suppress_warnings))
2299
    {
2300 2301 2302 2303 2304 2305 2306 2307 2308
      if (!check_io_slave_killed(thd, mi, "Slave I/O thread killed "
                                "while registering slave on master"))
      {
        sql_print_error("Slave I/O thread couldn't register on master");
        if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
                             reconnect_messages[SLAVE_RECON_ACT_REG]))
          goto err;
      }
      else
2309 2310
        goto err;
      goto connected;
2311
    }
2312 2313 2314 2315 2316 2317 2318 2319 2320 2321
    DBUG_EXECUTE_IF("FORCE_SLAVE_TO_RECONNECT_REG", 
      if (!retry_count_reg)
      {
        retry_count_reg++;
        sql_print_information("Forcing to reconnect slave I/O thread");
        if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
                             reconnect_messages[SLAVE_RECON_ACT_REG]))
          goto err;
        goto connected;
      });
2322
  }
2323

2324
  DBUG_PRINT("info",("Starting reading binary log from master"));
2325
  while (!io_slave_killed(thd,mi))
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
2326
  {
2327
    thd_proc_info(thd, "Requesting binlog dump");
2328
    if (request_dump(mysql, mi, &suppress_warnings))
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2329 2330
    {
      sql_print_error("Failed on request_dump()");
2331 2332 2333 2334
      if (check_io_slave_killed(thd, mi, "Slave I/O thread killed while \
requesting master dump") ||
          try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
                           reconnect_messages[SLAVE_RECON_ACT_DUMP]))
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2335
        goto err;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2336 2337
      goto connected;
    }
2338 2339 2340 2341 2342 2343 2344 2345 2346 2347
    DBUG_EXECUTE_IF("FORCE_SLAVE_TO_RECONNECT_DUMP", 
      if (!retry_count_dump)
      {
        retry_count_dump++;
        sql_print_information("Forcing to reconnect slave I/O thread");
        if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
                             reconnect_messages[SLAVE_RECON_ACT_DUMP]))
          goto err;
        goto connected;
      });
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2348

2349
    while (!io_slave_killed(thd,mi))
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2350
    {
2351
      ulong event_len;
2352
      /*
2353
         We say "waiting" because read_event() will wait if there's nothing to
2354 2355 2356
         read. But if there's something to read, it will not wait. The
         important thing is to not confuse users by saying "reading" whereas
         we're in fact receiving nothing.
2357
      */
2358
      thd_proc_info(thd, "Waiting for master to send event");
2359
      event_len= read_event(mysql, mi, &suppress_warnings);
2360 2361
      if (check_io_slave_killed(thd, mi, "Slave I/O thread killed while \
reading event"))
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2362
        goto err;
2363 2364 2365 2366 2367 2368 2369 2370 2371 2372
      DBUG_EXECUTE_IF("FORCE_SLAVE_TO_RECONNECT_EVENT",
        if (!retry_count_event)
        {
          retry_count_event++;
          sql_print_information("Forcing to reconnect slave I/O thread");
          if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
                               reconnect_messages[SLAVE_RECON_ACT_EVENT]))
            goto err;
          goto connected;
        });
2373

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2374 2375
      if (event_len == packet_error)
      {
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2376
        uint mysql_error_number= mysql_errno(mysql);
2377 2378
        switch (mysql_error_number) {
        case CR_NET_PACKET_TOO_LARGE:
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2379
          sql_print_error("\
2380 2381 2382
Log entry on master is longer than max_allowed_packet (%ld) on \
slave. If the entry is correct, restart the server with a higher value of \
max_allowed_packet",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2383 2384
                          thd->variables.max_allowed_packet);
          goto err;
2385
        case ER_MASTER_FATAL_ERROR_READING_BINLOG:
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2386 2387 2388
          sql_print_error(ER(mysql_error_number), mysql_error_number,
                          mysql_error(mysql));
          goto err;
2389 2390 2391 2392
        case EE_OUTOFMEMORY:
        case ER_OUTOFMEMORY:
          sql_print_error("\
Stopping slave I/O thread due to out-of-memory error from master");
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2393 2394
          goto err;
        }
2395 2396
        if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
                             reconnect_messages[SLAVE_RECON_ACT_EVENT]))
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2397 2398
          goto err;
        goto connected;
2399
      } // if (event_len == packet_error)
2400

grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2401
      retry_count=0;                    // ok event, reset retry counter
2402
      thd_proc_info(thd, "Queueing master event to the relay log");
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2403
      if (queue_event(mi,(const char*)mysql->net.read_pos + 1,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2404
                      event_len))
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2405
      {
2406 2407 2408
        mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
                   ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
                   "could not queue event from master");
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2409
        goto err;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2410
      }
2411 2412 2413 2414 2415
      if (flush_master_info(mi, 1))
      {
        sql_print_error("Failed to flush master info file");
        goto err;
      }
2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427
      /*
        See if the relay logs take too much space.
        We don't lock mi->rli.log_space_lock here; this dirty read saves time
        and does not introduce any problem:
        - if mi->rli.ignore_log_space_limit is 1 but becomes 0 just after (so
        the clean value is 0), then we are reading only one more event as we
        should, and we'll block only at the next event. No big deal.
        - if mi->rli.ignore_log_space_limit is 0 but becomes 1 just after (so
        the clean value is 1), then we are going into wait_for_relay_log_space()
        for no reason, but this function will do a clean read, notice the clean
        value and exit immediately.
      */
2428 2429 2430 2431 2432
#ifndef DBUG_OFF
      {
        char llbuf1[22], llbuf2[22];
        DBUG_PRINT("info", ("log_space_limit=%s log_space_total=%s \
ignore_log_space_limit=%d",
2433 2434
                            llstr(rli->log_space_limit,llbuf1),
                            llstr(rli->log_space_total,llbuf2),
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2435
                            (int) rli->ignore_log_space_limit));
2436 2437 2438
      }
#endif

2439
      if (rli->log_space_limit && rli->log_space_limit <
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2440
          rli->log_space_total &&
2441
          !rli->ignore_log_space_limit)
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2442 2443 2444
        if (wait_for_relay_log_space(rli))
        {
          sql_print_error("Slave I/O thread aborted while waiting for relay \
2445
log space");
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2446 2447 2448
          goto err;
        }
    }
2449
  }
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2450

monty@donna.mysql.fi's avatar
monty@donna.mysql.fi committed
2451
  // error = 0;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2452
err:
2453
  // print the current replication position
2454
  sql_print_information("Slave I/O thread exiting, read up to log '%s', position %s",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2455
                  IO_RPL_LOG_NAME, llstr(mi->master_log_pos,llbuff));
2456
  VOID(pthread_mutex_lock(&LOCK_thread_count));
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2457
  thd->query = thd->db = 0; // extra safety
2458
  thd->query_length= thd->db_length= 0;
2459
  VOID(pthread_mutex_unlock(&LOCK_thread_count));
2460 2461
  if (mysql)
  {
2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472
    /*
      Here we need to clear the active VIO before closing the
      connection with the master.  The reason is that THD::awake()
      might be called from terminate_slave_thread() because somebody
      issued a STOP SLAVE.  If that happends, the close_active_vio()
      can be called in the middle of closing the VIO associated with
      the 'mysql' object, causing a crash.
    */
#ifdef SIGNAL_WITH_VIO_CLOSE
    thd->clear_active_vio();
#endif
hf@deer.mysql.r18.ru's avatar
SCRUM  
hf@deer.mysql.r18.ru committed
2473
    mysql_close(mysql);
2474 2475
    mi->mysql=0;
  }
2476
  write_ignored_events_info_to_relay_log(thd, mi);
2477
  thd_proc_info(thd, "Waiting for slave mutex on exit");
2478
  pthread_mutex_lock(&mi->run_lock);
2479

2480 2481 2482
  /* Forget the relay log's format */
  delete mi->rli.relay_log.description_event_for_queue;
  mi->rli.relay_log.description_event_for_queue= 0;
2483
  // TODO: make rpl_status part of Master_info
2484
  change_rpl_status(RPL_ACTIVE_SLAVE,RPL_IDLE_SLAVE);
2485
  DBUG_ASSERT(thd->net.buff != 0);
2486
  net_end(&thd->net); // destructor will not free it, because net.vio is 0
2487
  close_thread_tables(thd);
2488
  pthread_mutex_lock(&LOCK_thread_count);
2489
  THD_CHECK_SENTRY(thd);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2490
  delete thd;
2491
  pthread_mutex_unlock(&LOCK_thread_count);
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2492 2493 2494
  mi->abort_slave= 0;
  mi->slave_running= 0;
  mi->io_thd= 0;
2495 2496 2497 2498 2499
  /*
    Note: the order of the two following calls (first broadcast, then unlock)
    is important. Otherwise a killer_thread can execute between the calls and
    delete the mi structure leading to a crash! (see BUG#25306 for details)
   */ 
2500
  pthread_cond_broadcast(&mi->stop_cond);       // tell the world we are done
2501
  pthread_mutex_unlock(&mi->run_lock);
2502
  my_thread_end();
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2503
  pthread_exit(0);
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2504
  DBUG_RETURN(0);                               // Can't return anything here
bk@work.mysql.com's avatar
bk@work.mysql.com committed
2505 2506
}

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2507

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2508
/* Slave SQL Thread entry point */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
2509

2510
pthread_handler_t handle_slave_sql(void *arg)
2511
{
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2512
  THD *thd;                     /* needs to be first for thread_stack */
2513
  char llbuff[22],llbuff1[22];
2514

2515
  Relay_log_info* rli = &((Master_info*)arg)->rli;
2516 2517 2518 2519
  const char *errmsg;

  // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
  my_thread_init();
2520
  DBUG_ENTER("handle_slave_sql");
2521

2522 2523 2524
  DBUG_ASSERT(rli->inited);
  pthread_mutex_lock(&rli->run_lock);
  DBUG_ASSERT(!rli->slave_running);
2525
  errmsg= 0;
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2526
#ifndef DBUG_OFF
2527
  rli->events_till_abort = abort_slave_event_count;
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2528
#endif
2529

2530
  thd = new THD; // note that contructor of THD uses DBUG_ !
2531
  thd->thread_stack = (char*)&thd; // remember where our stack is
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2532

2533 2534 2535
  /* Inform waiting threads that slave has started */
  rli->slave_run_id++;

2536 2537
  pthread_detach_this_thread();
  if (init_slave_thread(thd, SLAVE_THD_SQL))
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2538 2539 2540 2541 2542 2543 2544 2545 2546 2547
  {
    /*
      TODO: this is currently broken - slave start and change master
      will be stuck if we fail here
    */
    pthread_cond_broadcast(&rli->start_cond);
    pthread_mutex_unlock(&rli->run_lock);
    sql_print_error("Failed during slave thread initialization");
    goto err;
  }
2548
  thd->init_for_queries();
2549
  rli->sql_thd= thd;
2550
  thd->temporary_tables = rli->save_temporary_tables; // restore temp tables
2551
  pthread_mutex_lock(&LOCK_thread_count);
2552
  threads.append(thd);
2553
  pthread_mutex_unlock(&LOCK_thread_count);
2554 2555 2556 2557 2558 2559 2560 2561
  /*
    We are going to set slave_running to 1. Assuming slave I/O thread is
    alive and connected, this is going to make Seconds_Behind_Master be 0
    i.e. "caught up". Even if we're just at start of thread. Well it's ok, at
    the moment we start we can think we are caught up, and the next second we
    start receiving data so we realize we are not caught up and
    Seconds_Behind_Master grows. No big deal.
  */
2562 2563 2564
  rli->slave_running = 1;
  rli->abort_slave = 0;
  pthread_mutex_unlock(&rli->run_lock);
2565
  pthread_cond_broadcast(&rli->start_cond);
2566

guilhem@mysql.com's avatar
guilhem@mysql.com committed
2567 2568 2569
  /*
    Reset errors for a clean start (otherwise, if the master is idle, the SQL
    thread may execute no Query_log_event, so the error will remain even
guilhem@gbichot2's avatar
guilhem@gbichot2 committed
2570
    though there's no problem anymore). Do not reset the master timestamp
2571 2572 2573 2574
    (imagine the slave has caught everything, the STOP SLAVE and START SLAVE:
    as we are not sure that we are going to receive a query, we want to
    remember the last master timestamp (to say how many seconds behind we are
    now.
guilhem@gbichot2's avatar
guilhem@gbichot2 committed
2575
    But the master timestamp is reset by RESET SLAVE & CHANGE MASTER.
guilhem@mysql.com's avatar
guilhem@mysql.com committed
2576
  */
2577
  rli->clear_error();
2578 2579

  //tell the I/O thread to take relay_log_space_limit into account from now on
2580
  pthread_mutex_lock(&rli->log_space_lock);
2581
  rli->ignore_log_space_limit= 0;
2582
  pthread_mutex_unlock(&rli->log_space_lock);
2583
  rli->trans_retries= 0; // start from "no error"
2584
  DBUG_PRINT("info", ("rli->trans_retries: %lu", rli->trans_retries));
2585

2586
  if (init_relay_log_pos(rli,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2587 2588 2589
                         rli->group_relay_log_name,
                         rli->group_relay_log_pos,
                         1 /*need data lock*/, &errmsg,
2590
                         1 /*look for a description_event*/))
2591 2592
  {
    sql_print_error("Error initializing relay log position: %s",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2593
                    errmsg);
2594 2595
    goto err;
  }
2596
  THD_CHECK_SENTRY(thd);
2597 2598 2599 2600
#ifndef DBUG_OFF
  {
    char llbuf1[22], llbuf2[22];
    DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2601
                        llstr(my_b_tell(rli->cur_log),llbuf1),
2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620
                        llstr(rli->event_relay_log_pos,llbuf2)));
    DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE);
    /*
      Wonder if this is correct. I (Guilhem) wonder if my_b_tell() returns the
      correct position when it's called just after my_b_seek() (the questionable
      stuff is those "seek is done on next read" comments in the my_b_seek()
      source code).
      The crude reality is that this assertion randomly fails whereas
      replication seems to work fine. And there is no easy explanation why it
      fails (as we my_b_seek(rli->event_relay_log_pos) at the very end of
      init_relay_log_pos() called above). Maybe the assertion would be
      meaningful if we held rli->data_lock between the my_b_seek() and the
      DBUG_ASSERT().
    */
#ifdef SHOULD_BE_CHECKED
    DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos);
#endif
  }
#endif
2621
  DBUG_ASSERT(rli->sql_thd == thd);
2622 2623

  DBUG_PRINT("master_info",("log_file_name: %s  position: %s",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2624 2625
                            rli->group_master_log_name,
                            llstr(rli->group_master_log_pos,llbuff)));
2626
  if (global_system_variables.log_warnings)
2627
    sql_print_information("Slave SQL thread initialized, starting replication in \
2628
log '%s' at position %s, relay log '%s' position: %s", RPL_LOG_NAME,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2629 2630
                    llstr(rli->group_master_log_pos,llbuff),rli->group_relay_log_name,
                    llstr(rli->group_relay_log_pos,llbuff1));
2631

gluh@gluh.mysql.r18.ru's avatar
gluh@gluh.mysql.r18.ru committed
2632
  /* execute init_slave variable */
2633
  if (sys_init_slave.value_length)
gluh@gluh.mysql.r18.ru's avatar
gluh@gluh.mysql.r18.ru committed
2634
  {
2635
    execute_init_command(thd, &sys_init_slave, &LOCK_sys_init_slave);
2636
    if (thd->is_slave_error)
gluh@gluh.mysql.r18.ru's avatar
gluh@gluh.mysql.r18.ru committed
2637 2638 2639 2640 2641 2642 2643
    {
      sql_print_error("\
Slave SQL thread aborted. Can't execute init_slave query");
      goto err;
    }
  }

2644 2645
  /* Read queries from the IO/THREAD until this thread is killed */

2646
  while (!sql_slave_killed(thd,rli))
2647
  {
2648
    thd_proc_info(thd, "Reading event from the relay log");
2649
    DBUG_ASSERT(rli->sql_thd == thd);
2650
    THD_CHECK_SENTRY(thd);
2651 2652
    if (exec_relay_log_event(thd,rli))
    {
2653
      DBUG_PRINT("info", ("exec_relay_log_event() failed"));
2654
      // do not scare the user if SQL thread was simply killed or stopped
2655
      if (!sql_slave_killed(thd,rli))
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2656 2657
      {
        /*
2658 2659 2660
          retrieve as much info as possible from the thd and, error
          codes and warnings and print this to the error log as to
          allow the user to locate the error
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2661
        */
2662
        uint32 const last_errno= rli->last_error().number;
2663

2664
        if (thd->is_error())
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2665
        {
2666 2667 2668 2669 2670
          char const *const errmsg= thd->main_da.message();

          DBUG_PRINT("info",
                     ("thd->main_da.sql_errno()=%d; rli->last_error.number=%d",
                      thd->main_da.sql_errno(), last_errno));
2671
          if (last_errno == 0)
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2672
          {
2673
            rli->report(ERROR_LEVEL, thd->main_da.sql_errno(), errmsg);
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2674
          }
2675
          else if (last_errno != thd->main_da.sql_errno())
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2676 2677
          {
            sql_print_error("Slave (additional info): %s Error_code: %d",
2678
                            errmsg, thd->main_da.sql_errno());
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2679 2680 2681 2682 2683 2684
          }
        }

        /* Print any warnings issued */
        List_iterator_fast<MYSQL_ERROR> it(thd->warn_list);
        MYSQL_ERROR *err;
2685 2686 2687 2688 2689
        /*
          Added controlled slave thread cancel for replication
          of user-defined variables.
        */
        bool udf_error = false;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2690
        while ((err= it++))
2691 2692 2693
        {
          if (err->code == ER_CANT_OPEN_LIBRARY)
            udf_error = true;
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2694
          sql_print_warning("Slave: %s Error_code: %d",err->msg, err->code);
2695 2696 2697 2698 2699 2700 2701 2702 2703
        }
        if (udf_error)
          sql_print_error("Error loading user-defined library, slave SQL "
            "thread aborted. Install the missing library, and restart the "
            "slave SQL thread with \"SLAVE START\". We stopped at log '%s' "
            "position %s", RPL_LOG_NAME, llstr(rli->group_master_log_pos, 
            llbuff));
        else
          sql_print_error("\
2704
Error running query, slave SQL thread aborted. Fix the problem, and restart \
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2705
the slave SQL thread with \"SLAVE START\". We stopped at log \
2706
'%s' position %s", RPL_LOG_NAME, llstr(rli->group_master_log_pos, llbuff));
tomas@poseidon.ndb.mysql.com's avatar
tomas@poseidon.ndb.mysql.com committed
2707
      }
2708 2709
      goto err;
    }
2710
  }
2711

2712
  /* Thread stopped. Print the current replication position to the log */
monty@mysql.com's avatar
monty@mysql.com committed
2713
  sql_print_information("Slave SQL thread exiting, replication stopped in log "
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2714 2715
                        "'%s' at position %s",
                        RPL_LOG_NAME, llstr(rli->group_master_log_pos,llbuff));
2716 2717

 err:
2718 2719 2720 2721 2722 2723 2724 2725

  /*
    Some events set some playgrounds, which won't be cleared because thread
    stops. Stopping of this thread may not be known to these events ("stop"
    request is detected only by the present function, not by events), so we
    must "proactively" clear playgrounds:
  */
  rli->cleanup_context(thd, 1);
2726
  VOID(pthread_mutex_lock(&LOCK_thread_count));
2727 2728 2729 2730 2731
  /*
    Some extra safety, which should not been needed (normally, event deletion
    should already have done these assignments (each event which sets these
    variables is supposed to set them to 0 before terminating)).
  */
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2732
  thd->query= thd->db= thd->catalog= 0;
2733
  thd->query_length= thd->db_length= 0;
2734
  VOID(pthread_mutex_unlock(&LOCK_thread_count));
2735
  thd_proc_info(thd, "Waiting for slave mutex on exit");
2736
  pthread_mutex_lock(&rli->run_lock);
2737 2738
  /* We need data_lock, at least to wake up any waiting master_pos_wait() */
  pthread_mutex_lock(&rli->data_lock);
2739
  DBUG_ASSERT(rli->slave_running == 1); // tracking buffer overrun
2740
  /* When master_pos_wait() wakes up it will check this and terminate */
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2741
  rli->slave_running= 0;
2742 2743 2744
  /* Forget the relay log's format */
  delete rli->relay_log.description_event_for_exec;
  rli->relay_log.description_event_for_exec= 0;
2745 2746 2747 2748
  /* Wake up master_pos_wait() */
  pthread_mutex_unlock(&rli->data_lock);
  DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions"));
  pthread_cond_broadcast(&rli->data_cond);
guilhem@mysql.com's avatar
guilhem@mysql.com committed
2749
  rli->ignore_log_space_limit= 0; /* don't need any lock */
2750 2751
  /* we die so won't remember charset - re-update them on next thread start */
  rli->cached_charset_invalidate();
2752
  rli->save_temporary_tables = thd->temporary_tables;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2753 2754 2755 2756 2757

  /*
    TODO: see if we can do this conditionally in next_event() instead
    to avoid unneeded position re-init
  */
2758 2759 2760 2761
  thd->temporary_tables = 0; // remove tempation from destructor to close them
  DBUG_ASSERT(thd->net.buff != 0);
  net_end(&thd->net); // destructor will not free it, because we are weird
  DBUG_ASSERT(rli->sql_thd == thd);
2762
  THD_CHECK_SENTRY(thd);
2763
  rli->sql_thd= 0;
2764
  pthread_mutex_lock(&LOCK_thread_count);
2765
  THD_CHECK_SENTRY(thd);
2766 2767
  delete thd;
  pthread_mutex_unlock(&LOCK_thread_count);
2768 2769 2770 2771 2772
 /*
  Note: the order of the broadcast and unlock calls below (first broadcast, then unlock)
  is important. Otherwise a killer_thread can execute between the calls and
  delete the mi structure leading to a crash! (see BUG#25306 for details)
 */ 
2773
  pthread_cond_broadcast(&rli->stop_cond);
2774 2775
  pthread_mutex_unlock(&rli->run_lock);  // tell the world we are done
  
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2776
  my_thread_end();
2777
  pthread_exit(0);
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2778
  DBUG_RETURN(0);                               // Can't return anything here
2779
}
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
2780

2781

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2782
/*
2783
  process_io_create_file()
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2784
*/
2785

2786
static int process_io_create_file(Master_info* mi, Create_file_log_event* cev)
2787 2788 2789 2790
{
  int error = 1;
  ulong num_bytes;
  bool cev_not_written;
2791 2792
  THD *thd = mi->io_thd;
  NET *net = &mi->mysql->net;
2793
  DBUG_ENTER("process_io_create_file");
2794 2795

  if (unlikely(!cev->is_valid()))
2796
    DBUG_RETURN(1);
2797 2798

  if (!rpl_filter->db_ok(cev->db))
2799 2800
  {
    skip_load_data_infile(net);
2801
    DBUG_RETURN(0);
2802 2803 2804
  }
  DBUG_ASSERT(cev->inited_from_old);
  thd->file_id = cev->file_id = mi->file_id++;
2805
  thd->server_id = cev->server_id;
2806
  cev_not_written = 1;
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2807

2808 2809 2810
  if (unlikely(net_request_file(net,cev->fname)))
  {
    sql_print_error("Slave I/O: failed requesting download of '%s'",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2811
                    cev->fname);
2812 2813 2814
    goto err;
  }

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2815 2816 2817 2818
  /*
    This dummy block is so we could instantiate Append_block_log_event
    once and then modify it slightly instead of doing it multiple times
    in the loop
2819 2820
  */
  {
2821
    Append_block_log_event aev(thd,0,0,0,0);
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2822

2823 2824 2825 2826
    for (;;)
    {
      if (unlikely((num_bytes=my_net_read(net)) == packet_error))
      {
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2827 2828 2829
        sql_print_error("Network read error downloading '%s' from master",
                        cev->fname);
        goto err;
2830 2831 2832
      }
      if (unlikely(!num_bytes)) /* eof */
      {
2833 2834
	/* 3.23 master wants it */
        net_write_command(net, 0, (uchar*) "", 0, (uchar*) "", 0);
monty@mysql.com's avatar
monty@mysql.com committed
2835 2836 2837 2838 2839 2840 2841
        /*
          If we wrote Create_file_log_event, then we need to write
          Execute_load_log_event. If we did not write Create_file_log_event,
          then this is an empty file and we can just do as if the LOAD DATA
          INFILE had not existed, i.e. write nothing.
        */
        if (unlikely(cev_not_written))
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2842 2843 2844 2845 2846
          break;
        Execute_load_log_event xev(thd,0,0);
        xev.log_pos = cev->log_pos;
        if (unlikely(mi->rli.relay_log.append(&xev)))
        {
2847 2848 2849
          mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
                     ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
                     "error writing Exec_load event to relay log");
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2850 2851 2852 2853
          goto err;
        }
        mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total);
        break;
2854 2855 2856
      }
      if (unlikely(cev_not_written))
      {
2857
        cev->block = net->read_pos;
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2858 2859 2860
        cev->block_len = num_bytes;
        if (unlikely(mi->rli.relay_log.append(cev)))
        {
2861 2862 2863
          mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
                     ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
                     "error writing Create_file event to relay log");
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2864 2865 2866 2867
          goto err;
        }
        cev_not_written=0;
        mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total);
2868 2869 2870
      }
      else
      {
2871
        aev.block = net->read_pos;
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2872 2873 2874 2875
        aev.block_len = num_bytes;
        aev.log_pos = cev->log_pos;
        if (unlikely(mi->rli.relay_log.append(&aev)))
        {
2876 2877 2878
          mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
                     ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
                     "error writing Append_block event to relay log");
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2879 2880 2881
          goto err;
        }
        mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total) ;
2882 2883 2884 2885 2886
      }
    }
  }
  error=0;
err:
2887
  DBUG_RETURN(error);
2888
}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2889

2890

2891
/*
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2892 2893 2894 2895
  Start using a new binary log on the master

  SYNOPSIS
    process_io_rotate()
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2896 2897
    mi                  master_info for the slave
    rev                 The rotate log event read from the binary log
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2898 2899

  DESCRIPTION
2900
    Updates the master info with the place in the next binary
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2901
    log where we should start reading.
2902
    Rotate the relay log to avoid mixed-format relay logs.
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2903 2904 2905 2906 2907

  NOTES
    We assume we already locked mi->data_lock

  RETURN VALUES
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2908 2909
    0           ok
    1           Log event is illegal
2910 2911 2912

*/

2913
static int process_io_rotate(Master_info *mi, Rotate_log_event *rev)
2914
{
2915
  DBUG_ENTER("process_io_rotate");
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2916
  safe_mutex_assert_owner(&mi->data_lock);
2917

2918
  if (unlikely(!rev->is_valid()))
2919
    DBUG_RETURN(1);
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2920

2921
  /* Safe copy as 'rev' has been "sanitized" in Rotate_log_event's ctor */
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2922 2923
  memcpy(mi->master_log_name, rev->new_log_ident, rev->ident_len+1);
  mi->master_log_pos= rev->pos;
2924
  DBUG_PRINT("info", ("master_log_pos: '%s' %lu",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2925
                      mi->master_log_name, (ulong) mi->master_log_pos));
2926
#ifndef DBUG_OFF
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2927 2928 2929 2930 2931
  /*
    If we do not do this, we will be getting the first
    rotate event forever, so we need to not disconnect after one.
  */
  if (disconnect_slave_event_count)
2932
    mi->events_till_disconnect++;
2933
#endif
2934

2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948
  /*
    If description_event_for_queue is format <4, there is conversion in the
    relay log to the slave's format (4). And Rotate can mean upgrade or
    nothing. If upgrade, it's to 5.0 or newer, so we will get a Format_desc, so
    no need to reset description_event_for_queue now. And if it's nothing (same
    master version as before), no need (still using the slave's format).
  */
  if (mi->rli.relay_log.description_event_for_queue->binlog_version >= 4)
  {
    delete mi->rli.relay_log.description_event_for_queue;
    /* start from format 3 (MySQL 4.0) again */
    mi->rli.relay_log.description_event_for_queue= new
      Format_description_log_event(3);
  }
2949 2950 2951 2952
  /*
    Rotate the relay log makes binlog format detection easier (at next slave
    start or mysqlbinlog)
  */
2953
  rotate_relay_log(mi); /* will take the right mutexes */
2954
  DBUG_RETURN(0);
2955 2956
}

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2957
/*
2958 2959
  Reads a 3.23 event and converts it to the slave's format. This code was
  copied from MySQL 4.0.
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2960
*/
2961
static int queue_binlog_ver_1_event(Master_info *mi, const char *buf,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
2962
                           ulong event_len)
2963
{
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2964
  const char *errmsg = 0;
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2965 2966 2967
  ulong inc_pos;
  bool ignore_event= 0;
  char *tmp_buf = 0;
2968
  Relay_log_info *rli= &mi->rli;
2969
  DBUG_ENTER("queue_binlog_ver_1_event");
2970

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
2971 2972 2973
  /*
    If we get Load event, we need to pass a non-reusable buffer
    to read_log_event, so we do a trick
2974 2975 2976 2977 2978
  */
  if (buf[EVENT_TYPE_OFFSET] == LOAD_EVENT)
  {
    if (unlikely(!(tmp_buf=(char*)my_malloc(event_len+1,MYF(MY_WME)))))
    {
2979 2980
      mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
                 ER(ER_SLAVE_FATAL_ERROR), "Memory allocation failed");
2981
      DBUG_RETURN(1);
2982 2983
    }
    memcpy(tmp_buf,buf,event_len);
2984 2985 2986 2987 2988 2989 2990 2991
    /*
      Create_file constructor wants a 0 as last char of buffer, this 0 will
      serve as the string-termination char for the file's name (which is at the
      end of the buffer)
      We must increment event_len, otherwise the event constructor will not see
      this end 0, which leads to segfault.
    */
    tmp_buf[event_len++]=0;
2992
    int4store(tmp_buf+EVENT_LEN_OFFSET, event_len);
2993 2994
    buf = (const char*)tmp_buf;
  }
2995 2996 2997 2998 2999 3000
  /*
    This will transform LOAD_EVENT into CREATE_FILE_EVENT, ask the master to
    send the loaded file, and write it to the relay log in the form of
    Append_block/Exec_load (the SQL thread needs the data, as that thread is not
    connected to the master).
  */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3001
  Log_event *ev = Log_event::read_log_event(buf,event_len, &errmsg,
3002
                                            mi->rli.relay_log.description_event_for_queue);
3003
  if (unlikely(!ev))
3004 3005
  {
    sql_print_error("Read invalid event from master: '%s',\
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3006
 master could be corrupt but a more likely cause of this is a bug",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3007
                    errmsg);
3008 3009
    my_free((char*) tmp_buf, MYF(MY_ALLOW_ZERO_PTR));
    DBUG_RETURN(1);
3010
  }
3011

3012
  pthread_mutex_lock(&mi->data_lock);
3013
  ev->log_pos= mi->master_log_pos; /* 3.23 events don't contain log_pos */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3014
  switch (ev->get_type_code()) {
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3015
  case STOP_EVENT:
3016
    ignore_event= 1;
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3017 3018
    inc_pos= event_len;
    break;
3019
  case ROTATE_EVENT:
3020
    if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
3021 3022
    {
      delete ev;
3023
      pthread_mutex_unlock(&mi->data_lock);
3024
      DBUG_RETURN(1);
3025
    }
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3026
    inc_pos= 0;
3027
    break;
3028
  case CREATE_FILE_EVENT:
3029 3030 3031 3032 3033 3034
    /*
      Yes it's possible to have CREATE_FILE_EVENT here, even if we're in
      queue_old_event() which is for 3.23 events which don't comprise
      CREATE_FILE_EVENT. This is because read_log_event() above has just
      transformed LOAD_EVENT into CREATE_FILE_EVENT.
    */
3035
  {
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3036
    /* We come here when and only when tmp_buf != 0 */
3037
    DBUG_ASSERT(tmp_buf != 0);
3038 3039
    inc_pos=event_len;
    ev->log_pos+= inc_pos;
3040
    int error = process_io_create_file(mi,(Create_file_log_event*)ev);
3041
    delete ev;
3042
    mi->master_log_pos += inc_pos;
3043
    DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos));
3044
    pthread_mutex_unlock(&mi->data_lock);
3045
    my_free((char*)tmp_buf, MYF(0));
3046
    DBUG_RETURN(error);
3047
  }
3048
  default:
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3049
    inc_pos= event_len;
3050 3051
    break;
  }
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3052
  if (likely(!ignore_event))
3053
  {
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3054 3055
    if (ev->log_pos)
      /*
3056 3057 3058 3059
         Don't do it for fake Rotate events (see comment in
      Log_event::Log_event(const char* buf...) in log_event.cc).
      */
      ev->log_pos+= event_len; /* make log_pos be the pos of the end of the event */
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3060
    if (unlikely(rli->relay_log.append(ev)))
3061 3062 3063
    {
      delete ev;
      pthread_mutex_unlock(&mi->data_lock);
3064
      DBUG_RETURN(1);
3065
    }
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3066
    rli->relay_log.harvest_bytes_written(&rli->log_space_total);
3067 3068
  }
  delete ev;
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3069
  mi->master_log_pos+= inc_pos;
3070
  DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos));
3071
  pthread_mutex_unlock(&mi->data_lock);
3072
  DBUG_RETURN(0);
3073 3074
}

3075 3076 3077 3078
/*
  Reads a 4.0 event and converts it to the slave's format. This code was copied
  from queue_binlog_ver_1_event(), with some affordable simplifications.
*/
3079
static int queue_binlog_ver_3_event(Master_info *mi, const char *buf,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3080
                           ulong event_len)
3081 3082 3083 3084
{
  const char *errmsg = 0;
  ulong inc_pos;
  char *tmp_buf = 0;
3085
  Relay_log_info *rli= &mi->rli;
3086 3087 3088 3089 3090 3091 3092 3093 3094
  DBUG_ENTER("queue_binlog_ver_3_event");

  /* read_log_event() will adjust log_pos to be end_log_pos */
  Log_event *ev = Log_event::read_log_event(buf,event_len, &errmsg,
                                            mi->rli.relay_log.description_event_for_queue);
  if (unlikely(!ev))
  {
    sql_print_error("Read invalid event from master: '%s',\
 master could be corrupt but a more likely cause of this is a bug",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3095
                    errmsg);
3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125
    my_free((char*) tmp_buf, MYF(MY_ALLOW_ZERO_PTR));
    DBUG_RETURN(1);
  }
  pthread_mutex_lock(&mi->data_lock);
  switch (ev->get_type_code()) {
  case STOP_EVENT:
    goto err;
  case ROTATE_EVENT:
    if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
    {
      delete ev;
      pthread_mutex_unlock(&mi->data_lock);
      DBUG_RETURN(1);
    }
    inc_pos= 0;
    break;
  default:
    inc_pos= event_len;
    break;
  }
  if (unlikely(rli->relay_log.append(ev)))
  {
    delete ev;
    pthread_mutex_unlock(&mi->data_lock);
    DBUG_RETURN(1);
  }
  rli->relay_log.harvest_bytes_written(&rli->log_space_total);
  delete ev;
  mi->master_log_pos+= inc_pos;
err:
3126
  DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos));
3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137
  pthread_mutex_unlock(&mi->data_lock);
  DBUG_RETURN(0);
}

/*
  queue_old_event()

  Writes a 3.23 or 4.0 event to the relay log, after converting it to the 5.0
  (exactly, slave's) format. To do the conversion, we create a 5.0 event from
  the 3.23/4.0 bytes, then write this event to the relay log.

grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3138
  TODO:
3139 3140 3141 3142
    Test this code before release - it has to be tested on a separate
    setup with 3.23 master or 4.0 master
*/

3143
static int queue_old_event(Master_info *mi, const char *buf,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3144
                           ulong event_len)
3145
{
3146 3147
  DBUG_ENTER("queue_old_event");

3148 3149 3150
  switch (mi->rli.relay_log.description_event_for_queue->binlog_version)
  {
  case 1:
3151
      DBUG_RETURN(queue_binlog_ver_1_event(mi,buf,event_len));
3152
  case 3:
3153
      DBUG_RETURN(queue_binlog_ver_3_event(mi,buf,event_len));
3154 3155
  default: /* unsupported format; eg version 2 */
    DBUG_PRINT("info",("unsupported binlog format %d in queue_old_event()",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3156
                       mi->rli.relay_log.description_event_for_queue->binlog_version));
3157
    DBUG_RETURN(1);
3158 3159
  }
}
3160

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3161
/*
3162 3163
  queue_event()

3164 3165 3166 3167 3168
  If the event is 3.23/4.0, passes it to queue_old_event() which will convert
  it. Otherwise, writes a 5.0 (or newer) event to the relay log. Then there is
  no format conversion, it's pure read/write of bytes.
  So a 5.0.0 slave's relay log can contain events in the slave's format or in
  any >=5.0.0 format.
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3169 3170
*/

3171
static int queue_event(Master_info* mi,const char* buf, ulong event_len)
3172
{
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3173 3174
  int error= 0;
  ulong inc_pos;
3175
  Relay_log_info *rli= &mi->rli;
3176
  pthread_mutex_t *log_lock= rli->relay_log.get_log_lock();
3177 3178
  DBUG_ENTER("queue_event");

3179 3180
  LINT_INIT(inc_pos);

3181 3182
  if (mi->rli.relay_log.description_event_for_queue->binlog_version<4 &&
      buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */)
3183
    DBUG_RETURN(queue_old_event(mi,buf,event_len));
3184

3185
  LINT_INIT(inc_pos);
3186
  pthread_mutex_lock(&mi->data_lock);
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3187

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3188
  switch (buf[EVENT_TYPE_OFFSET]) {
3189
  case STOP_EVENT:
3190 3191
    /*
      We needn't write this event to the relay log. Indeed, it just indicates a
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
3192 3193 3194
      master server shutdown. The only thing this does is cleaning. But
      cleaning is already done on a per-master-thread basis (as the master
      server is shutting down cleanly, it has written all DROP TEMPORARY TABLE
3195
      prepared statements' deletion are TODO only when we binlog prep stmts).
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3196

monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
3197 3198 3199 3200
      We don't even increment mi->master_log_pos, because we may be just after
      a Rotate event. Btw, in a few milliseconds we are going to have a Start
      event from the next binlog (unless the master is presently running
      without --log-bin).
3201 3202
    */
    goto err;
3203 3204
  case ROTATE_EVENT:
  {
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3205
    Rotate_log_event rev(buf,event_len,mi->rli.relay_log.description_event_for_queue);
3206
    if (unlikely(process_io_rotate(mi,&rev)))
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3207
    {
3208 3209
      error= 1;
      goto err;
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3210
    }
3211 3212 3213 3214
    /*
      Now the I/O thread has just changed its mi->master_log_name, so
      incrementing mi->master_log_pos is nonsense.
    */
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3215
    inc_pos= 0;
3216 3217
    break;
  }
3218 3219 3220 3221 3222 3223 3224
  case FORMAT_DESCRIPTION_EVENT:
  {
    /*
      Create an event, and save it (when we rotate the relay log, we will have
      to write this event again).
    */
    /*
3225 3226 3227
      We are the only thread which reads/writes description_event_for_queue.
      The relay_log struct does not move (though some members of it can
      change), so we needn't any lock (no rli->data_lock, no log lock).
3228
    */
3229
    Format_description_log_event* tmp;
3230
    const char* errmsg;
3231
    if (!(tmp= (Format_description_log_event*)
3232
          Log_event::read_log_event(buf, event_len, &errmsg,
3233
                                    mi->rli.relay_log.description_event_for_queue)))
3234 3235 3236 3237
    {
      error= 2;
      goto err;
    }
3238 3239
    delete mi->rli.relay_log.description_event_for_queue;
    mi->rli.relay_log.description_event_for_queue= tmp;
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3240
    /*
3241
       Though this does some conversion to the slave's format, this will
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3242
       preserve the master's binlog format version, and number of event types.
3243
    */
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3244
    /*
3245
       If the event was not requested by the slave (the slave did not ask for
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3246
       it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos
3247 3248 3249
    */
    inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
    DBUG_PRINT("info",("binlog format is now %d",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3250
                       mi->rli.relay_log.description_event_for_queue->binlog_version));
3251 3252 3253

  }
  break;
3254
  default:
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3255
    inc_pos= event_len;
3256 3257
    break;
  }
3258

grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3259 3260
  /*
     If this event is originating from this server, don't queue it.
3261
     We don't check this for 3.23 events because it's simpler like this; 3.23
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
3262 3263
     will be filtered anyway by the SQL slave thread which also tests the
     server id (we must also keep this test in the SQL thread, in case somebody
3264 3265 3266 3267 3268 3269 3270 3271
     upgrades a 4.0 slave which has a not-filtered relay log).

     ANY event coming from ourselves can be ignored: it is obvious for queries;
     for STOP_EVENT/ROTATE_EVENT/START_EVENT: these cannot come from ourselves
     (--log-slave-updates would not log that) unless this slave is also its
     direct master (an unsupported, useless setup!).
  */

3272 3273
  pthread_mutex_lock(log_lock);

guilhem@mysql.com's avatar
guilhem@mysql.com committed
3274
  if ((uint4korr(buf + SERVER_ID_OFFSET) == ::server_id) &&
3275
      !mi->rli.replicate_same_server_id)
3276
  {
3277 3278
    /*
      Do not write it to the relay log.
3279 3280 3281 3282 3283 3284 3285 3286
      a) We still want to increment mi->master_log_pos, so that we won't
      re-read this event from the master if the slave IO thread is now
      stopped/restarted (more efficient if the events we are ignoring are big
      LOAD DATA INFILE).
      b) We want to record that we are skipping events, for the information of
      the slave SQL thread, otherwise that thread may let
      rli->group_relay_log_pos stay too small if the last binlog's event is
      ignored.
3287 3288 3289
      But events which were generated by this slave and which do not exist in
      the master's binlog (i.e. Format_desc, Rotate & Stop) should not increment
      mi->master_log_pos.
3290
    */
3291 3292 3293
    if (buf[EVENT_TYPE_OFFSET]!=FORMAT_DESCRIPTION_EVENT &&
        buf[EVENT_TYPE_OFFSET]!=ROTATE_EVENT &&
        buf[EVENT_TYPE_OFFSET]!=STOP_EVENT)
3294
    {
3295
      mi->master_log_pos+= inc_pos;
3296 3297 3298 3299
      memcpy(rli->ign_master_log_name_end, mi->master_log_name, FN_REFLEN);
      DBUG_ASSERT(rli->ign_master_log_name_end[0]);
      rli->ign_master_log_pos_end= mi->master_log_pos;
    }
3300
    rli->relay_log.signal_update(); // the slave SQL thread needs to re-check
3301
    DBUG_PRINT("info", ("master_log_pos: %lu, event originating from the same server, ignored",
3302
                        (ulong) mi->master_log_pos));
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3303
  }
3304 3305 3306
  else
  {
    /* write the event to the relay log */
3307
    if (likely(!(rli->relay_log.appendv(buf,event_len,0))))
3308 3309
    {
      mi->master_log_pos+= inc_pos;
3310
      DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos));
3311 3312
      rli->relay_log.harvest_bytes_written(&rli->log_space_total);
    }
3313
    else
3314
      error= 3;
3315
    rli->ign_master_log_name_end[0]= 0; // last event is not ignored
3316
  }
3317
  pthread_mutex_unlock(log_lock);
3318 3319 3320


err:
3321
  pthread_mutex_unlock(&mi->data_lock);
3322
  DBUG_PRINT("info", ("error: %d", error));
3323
  DBUG_RETURN(error);
3324 3325
}

3326

3327
void end_relay_log_info(Relay_log_info* rli)
3328
{
3329 3330
  DBUG_ENTER("end_relay_log_info");

3331
  if (!rli->inited)
3332
    DBUG_VOID_RETURN;
3333
  if (rli->info_fd >= 0)
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3334 3335
  {
    end_io_cache(&rli->info_file);
3336
    (void) my_close(rli->info_fd, MYF(MY_WME));
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3337 3338
    rli->info_fd = -1;
  }
3339
  if (rli->cur_log_fd >= 0)
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3340 3341 3342 3343 3344
  {
    end_io_cache(&rli->cache_buf);
    (void)my_close(rli->cur_log_fd, MYF(MY_WME));
    rli->cur_log_fd = -1;
  }
3345
  rli->inited = 0;
3346
  rli->relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT);
3347
  rli->relay_log.harvest_bytes_written(&rli->log_space_total);
guilhem@mysql.com's avatar
guilhem@mysql.com committed
3348 3349 3350 3351 3352 3353
  /*
    Delete the slave's temporary tables from memory.
    In the future there will be other actions than this, to ensure persistance
    of slave's temp tables after shutdown.
  */
  rli->close_temporary_tables();
3354
  DBUG_VOID_RETURN;
3355 3356
}

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3357 3358
/*
  Try to connect until successful or slave killed
3359

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3360 3361
  SYNPOSIS
    safe_connect()
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3362 3363 3364
    thd                 Thread handler for slave
    mysql               MySQL connection handle
    mi                  Replication handle
3365

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3366
  RETURN
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3367 3368
    0   ok
    #   Error
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3369
*/
3370

3371
static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
3372
{
3373 3374 3375
  DBUG_ENTER("safe_connect");

  DBUG_RETURN(connect_to_master(thd, mysql, mi, 0, 0));
bk@work.mysql.com's avatar
bk@work.mysql.com committed
3376 3377
}

3378

3379
/*
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3380 3381
  SYNPOSIS
    connect_to_master()
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3382

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3383 3384 3385
  IMPLEMENTATION
    Try to connect until successful or slave killed or we have retried
    master_retry_count times
3386
*/
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3387

3388
static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3389
                             bool reconnect, bool suppress_warnings)
bk@work.mysql.com's avatar
bk@work.mysql.com committed
3390
{
3391
  int slave_was_killed;
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3392
  int last_errno= -2;                           // impossible error
3393
  ulong err_count=0;
3394
  char llbuff[22];
3395
  DBUG_ENTER("connect_to_master");
3396

3397
#ifndef DBUG_OFF
3398
  mi->events_till_disconnect = disconnect_slave_event_count;
3399
#endif
3400
  ulong client_flag= CLIENT_REMEMBER_OPTIONS;
3401
  if (opt_slave_compressed_protocol)
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3402
    client_flag=CLIENT_COMPRESS;                /* We will use compression */
3403

3404 3405
  mysql_options(mysql, MYSQL_OPT_CONNECT_TIMEOUT, (char *) &slave_net_timeout);
  mysql_options(mysql, MYSQL_OPT_READ_TIMEOUT, (char *) &slave_net_timeout);
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3406

3407 3408
#ifdef HAVE_OPENSSL
  if (mi->ssl)
3409
  {
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3410
    mysql_ssl_set(mysql,
3411
                  mi->ssl_key[0]?mi->ssl_key:0,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3412
                  mi->ssl_cert[0]?mi->ssl_cert:0,
3413 3414 3415
                  mi->ssl_ca[0]?mi->ssl_ca:0,
                  mi->ssl_capath[0]?mi->ssl_capath:0,
                  mi->ssl_cipher[0]?mi->ssl_cipher:0);
3416 3417 3418
    mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT,
                  &mi->ssl_verify_server_cert);
  }
3419 3420
#endif

3421 3422 3423 3424
  mysql_options(mysql, MYSQL_SET_CHARSET_NAME, default_charset_info->csname);
  /* This one is not strictly needed but we have it here for completeness */
  mysql_options(mysql, MYSQL_SET_CHARSET_DIR, (char *) charsets_dir);

3425
  while (!(slave_was_killed = io_slave_killed(thd,mi)) &&
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3426 3427 3428
         (reconnect ? mysql_reconnect(mysql) != 0 :
          mysql_real_connect(mysql, mi->host, mi->user, mi->password, 0,
                             mi->port, 0, client_flag) == 0))
bk@work.mysql.com's avatar
bk@work.mysql.com committed
3429
  {
3430
    /* Don't repeat last error */
hf@deer.mysql.r18.ru's avatar
SCRUM  
hf@deer.mysql.r18.ru committed
3431
    if ((int)mysql_errno(mysql) != last_errno)
3432
    {
hf@deer.mysql.r18.ru's avatar
SCRUM  
hf@deer.mysql.r18.ru committed
3433
      last_errno=mysql_errno(mysql);
3434
      suppress_warnings= 0;
3435 3436 3437 3438 3439 3440
      mi->report(ERROR_LEVEL, last_errno,
                 "error %s to master '%s@%s:%d'"
                 " - retry-time: %d  retries: %lu",
                 (reconnect ? "reconnecting" : "connecting"),
                 mi->user, mi->host, mi->port,
                 mi->connect_retry, master_retry_count);
3441
    }
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3442 3443 3444
    /*
      By default we try forever. The reason is that failure will trigger
      master election, so if the user did not set master_retry_count we
3445
      do not want to have election triggered on the first failure to
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3446
      connect
3447
    */
3448
    if (++err_count == master_retry_count)
3449 3450
    {
      slave_was_killed=1;
sasha@mysql.sashanet.com's avatar
sasha@mysql.sashanet.com committed
3451 3452
      if (reconnect)
        change_rpl_status(RPL_ACTIVE_SLAVE,RPL_LOST_SOLDIER);
3453 3454
      break;
    }
3455
    safe_sleep(thd,mi->connect_retry,(CHECK_KILLED_FUNC)io_slave_killed,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3456
               (void*)mi);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
3457
  }
3458

3459 3460
  if (!slave_was_killed)
  {
sasha@mysql.sashanet.com's avatar
sasha@mysql.sashanet.com committed
3461
    if (reconnect)
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3462
    {
3463
      if (!suppress_warnings && global_system_variables.log_warnings)
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3464
        sql_print_information("Slave: connected to master '%s@%s:%d',\
3465
replication resumed in log '%s' at position %s", mi->user,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3466 3467 3468
                        mi->host, mi->port,
                        IO_RPL_LOG_NAME,
                        llstr(mi->master_log_pos,llbuff));
3469
    }
sasha@mysql.sashanet.com's avatar
sasha@mysql.sashanet.com committed
3470 3471 3472
    else
    {
      change_rpl_status(RPL_IDLE_SLAVE,RPL_ACTIVE_SLAVE);
3473 3474
      general_log_print(thd, COM_CONNECT_OUT, "%s@%s:%d",
                        mi->user, mi->host, mi->port);
sasha@mysql.sashanet.com's avatar
sasha@mysql.sashanet.com committed
3475
    }
3476
#ifdef SIGNAL_WITH_VIO_CLOSE
3477
    thd->set_active_vio(mysql->net.vio);
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3478
#endif
3479
  }
3480
  mysql->reconnect= 1;
3481 3482
  DBUG_PRINT("exit",("slave_was_killed: %d", slave_was_killed));
  DBUG_RETURN(slave_was_killed);
bk@work.mysql.com's avatar
bk@work.mysql.com committed
3483 3484
}

3485

sasha@mysql.sashanet.com's avatar
sasha@mysql.sashanet.com committed
3486
/*
3487
  safe_reconnect()
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3488

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3489 3490 3491
  IMPLEMENTATION
    Try to connect until successful or slave killed or we have retried
    master_retry_count times
sasha@mysql.sashanet.com's avatar
sasha@mysql.sashanet.com committed
3492 3493
*/

3494
static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3495
                          bool suppress_warnings)
sasha@mysql.sashanet.com's avatar
sasha@mysql.sashanet.com committed
3496
{
3497 3498
  DBUG_ENTER("safe_reconnect");
  DBUG_RETURN(connect_to_master(thd, mysql, mi, 1, suppress_warnings));
sasha@mysql.sashanet.com's avatar
sasha@mysql.sashanet.com committed
3499 3500
}

3501

3502 3503 3504 3505 3506 3507
/*
  Store the file and position where the execute-slave thread are in the
  relay log.

  SYNOPSIS
    flush_relay_log_info()
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3508
    rli                 Relay log information
3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526

  NOTES
    - As this is only called by the slave thread, we don't need to
      have a lock on this.
    - If there is an active transaction, then we don't update the position
      in the relay log.  This is to ensure that we re-execute statements
      if we die in the middle of an transaction that was rolled back.
    - As a transaction never spans binary logs, we don't have to handle the
      case where we do a relay-log-rotation in the middle of the transaction.
      If this would not be the case, we would have to ensure that we
      don't delete the relay log file where the transaction started when
      we switch to a new relay log file.

  TODO
    - Change the log file information to a binary format to avoid calling
      longlong2str.

  RETURN VALUES
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3527 3528
    0   ok
    1   write error
3529 3530
*/

3531
bool flush_relay_log_info(Relay_log_info* rli)
3532
{
3533
  bool error=0;
3534
  DBUG_ENTER("flush_relay_log_info");
3535 3536

  if (unlikely(rli->no_storage))
3537
    DBUG_RETURN(0);
3538

3539 3540 3541
  IO_CACHE *file = &rli->info_file;
  char buff[FN_REFLEN*2+22*2+4], *pos;

3542
  my_b_seek(file, 0L);
3543
  pos=strmov(buff, rli->group_relay_log_name);
3544
  *pos++='\n';
3545
  pos=longlong2str(rli->group_relay_log_pos, pos, 10);
3546
  *pos++='\n';
3547
  pos=strmov(pos, rli->group_master_log_name);
3548
  *pos++='\n';
3549
  pos=longlong2str(rli->group_master_log_pos, pos, 10);
3550
  *pos='\n';
3551
  if (my_b_write(file, (uchar*) buff, (size_t) (pos-buff)+1))
3552 3553 3554
    error=1;
  if (flush_io_cache(file))
    error=1;
3555

3556
  /* Flushing the relay log is done by the slave I/O thread */
3557
  DBUG_RETURN(error);
3558 3559
}

3560

3561
/*
3562
  Called when we notice that the current "hot" log got rotated under our feet.
3563 3564
*/

3565
static IO_CACHE *reopen_relay_log(Relay_log_info *rli, const char **errmsg)
3566
{
3567
  DBUG_ENTER("reopen_relay_log");
3568 3569
  DBUG_ASSERT(rli->cur_log != &rli->cache_buf);
  DBUG_ASSERT(rli->cur_log_fd == -1);
3570 3571

  IO_CACHE *cur_log = rli->cur_log=&rli->cache_buf;
3572
  if ((rli->cur_log_fd=open_binlog(cur_log,rli->event_relay_log_name,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3573
                                   errmsg)) <0)
3574
    DBUG_RETURN(0);
3575 3576
  /*
    We want to start exactly where we was before:
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3577 3578
    relay_log_pos       Current log pos
    pending             Number of bytes already processed from the event
3579
  */
3580
  rli->event_relay_log_pos= max(rli->event_relay_log_pos, BIN_LOG_HEADER_SIZE);
3581
  my_b_seek(cur_log,rli->event_relay_log_pos);
3582
  DBUG_RETURN(cur_log);
3583 3584
}

3585

3586
static Log_event* next_event(Relay_log_info* rli)
3587 3588 3589
{
  Log_event* ev;
  IO_CACHE* cur_log = rli->cur_log;
3590
  pthread_mutex_t *log_lock = rli->relay_log.get_log_lock();
3591 3592
  const char* errmsg=0;
  THD* thd = rli->sql_thd;
3593
  DBUG_ENTER("next_event");
3594

3595 3596
  DBUG_ASSERT(thd != 0);

3597 3598 3599 3600 3601
#ifndef DBUG_OFF
  if (abort_slave_event_count && !rli->events_till_abort--)
    DBUG_RETURN(0);
#endif

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3602 3603
  /*
    For most operations we need to protect rli members with data_lock,
3604 3605 3606 3607
    so we assume calling function acquired this mutex for us and we will
    hold it for the most of the loop below However, we will release it
    whenever it is worth the hassle,  and in the cases when we go into a
    pthread_cond_wait() with the non-data_lock mutex
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3608
  */
3609
  safe_mutex_assert_owner(&rli->data_lock);
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3610

3611
  while (!sql_slave_killed(thd,rli))
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3612 3613 3614
  {
    /*
      We can have two kinds of log reading:
3615 3616 3617 3618 3619 3620 3621 3622
      hot_log:
        rli->cur_log points at the IO_CACHE of relay_log, which
        is actively being updated by the I/O thread. We need to be careful
        in this case and make sure that we are not looking at a stale log that
        has already been rotated. If it has been, we reopen the log.

      The other case is much simpler:
        We just have a read only log that nobody else will be updating.
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3623
    */
3624 3625 3626 3627 3628
    bool hot_log;
    if ((hot_log = (cur_log != &rli->cache_buf)))
    {
      DBUG_ASSERT(rli->cur_log_fd == -1); // foreign descriptor
      pthread_mutex_lock(log_lock);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3629 3630

      /*
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3631 3632
        Reading xxx_file_id is safe because the log will only
        be rotated when we hold relay_log.LOCK_log
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3633
      */
3634
      if (rli->relay_log.get_open_count() != rli->cur_log_old_open_count)
3635
      {
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3636 3637 3638 3639 3640 3641
        // The master has switched to a new log file; Reopen the old log file
        cur_log=reopen_relay_log(rli, &errmsg);
        pthread_mutex_unlock(log_lock);
        if (!cur_log)                           // No more log files
          goto err;
        hot_log=0;                              // Using old binary log
3642 3643
      }
    }
3644 3645 3646 3647 3648 3649 3650
    /* 
      As there is no guarantee that the relay is open (for example, an I/O
      error during a write by the slave I/O thread may have closed it), we
      have to test it.
    */
    if (!my_b_inited(cur_log))
      goto err;
3651 3652
#ifndef DBUG_OFF
    {
3653
      /* This is an assertion which sometimes fails, let's try to track it */
3654
      char llbuf1[22], llbuf2[22];
3655
      DBUG_PRINT("info", ("my_b_tell(cur_log)=%s rli->event_relay_log_pos=%s",
3656
                          llstr(my_b_tell(cur_log),llbuf1),
3657 3658 3659
                          llstr(rli->event_relay_log_pos,llbuf2)));
      DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE);
      DBUG_ASSERT(my_b_tell(cur_log) == rli->event_relay_log_pos);
3660 3661
    }
#endif
3662 3663
    /*
      Relay log is always in new format - if the master is 3.23, the
3664
      I/O thread will convert the format for us.
3665 3666
      A problem: the description event may be in a previous relay log. So if
      the slave has been shutdown meanwhile, we would have to look in old relay
3667 3668
      logs, which may even have been deleted. So we need to write this
      description event at the beginning of the relay log.
3669 3670
      When the relay log is created when the I/O thread starts, easy: the
      master will send the description event and we will queue it.
3671
      But if the relay log is created by new_file(): then the solution is:
3672
      MYSQL_BIN_LOG::open() will write the buffered description event.
3673
    */
3674 3675
    if ((ev=Log_event::read_log_event(cur_log,0,
                                      rli->relay_log.description_event_for_exec)))
3676

3677 3678
    {
      DBUG_ASSERT(thd==rli->sql_thd);
3679 3680 3681 3682 3683
      /*
        read it while we have a lock, to avoid a mutex lock in
        inc_event_relay_log_pos()
      */
      rli->future_event_relay_log_pos= my_b_tell(cur_log);
3684
      if (hot_log)
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3685
        pthread_mutex_unlock(log_lock);
3686
      DBUG_RETURN(ev);
3687 3688
    }
    DBUG_ASSERT(thd==rli->sql_thd);
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3689
    if (opt_reckless_slave)                     // For mysql-test
3690
      cur_log->error = 0;
3691
    if (cur_log->error < 0)
3692 3693
    {
      errmsg = "slave SQL thread aborted because of I/O error";
3694
      if (hot_log)
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3695
        pthread_mutex_unlock(log_lock);
3696 3697
      goto err;
    }
3698 3699
    if (!cur_log->error) /* EOF */
    {
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3700
      /*
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3701 3702 3703
        On a hot log, EOF means that there are no more updates to
        process and we must block until I/O thread adds some and
        signals us to continue
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3704
      */
3705 3706
      if (hot_log)
      {
3707 3708 3709 3710 3711 3712 3713 3714 3715 3716
        /*
          We say in Seconds_Behind_Master that we have "caught up". Note that
          for example if network link is broken but I/O slave thread hasn't
          noticed it (slave_net_timeout not elapsed), then we'll say "caught
          up" whereas we're not really caught up. Fixing that would require
          internally cutting timeout in smaller pieces in network read, no
          thanks. Another example: SQL has caught up on I/O, now I/O has read
          a new event and is queuing it; the false "0" will exist until SQL
          finishes executing the new event; it will be look abnormal only if
          the events have old timestamps (then you get "many", 0, "many").
3717 3718 3719 3720 3721 3722 3723 3724 3725 3726

          Transient phases like this can be fixed with implemeting
          Heartbeat event which provides the slave the status of the
          master at time the master does not have any new update to send.
          Seconds_Behind_Master would be zero only when master has no
          more updates in binlog for slave. The heartbeat can be sent
          in a (small) fraction of slave_net_timeout. Until it's done
          rli->last_master_timestamp is temporarely (for time of
          waiting for the following event) reset whenever EOF is
          reached.
3727
        */
3728
        time_t save_timestamp= rli->last_master_timestamp;
3729 3730
        rli->last_master_timestamp= 0;

grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3731
        DBUG_ASSERT(rli->relay_log.get_open_count() ==
3732
                    rli->cur_log_old_open_count);
3733 3734 3735 3736 3737

        if (rli->ign_master_log_name_end[0])
        {
          /* We generate and return a Rotate, to make our positions advance */
          DBUG_PRINT("info",("seeing an ignored end segment"));
3738
          ev= new Rotate_log_event(rli->ign_master_log_name_end,
3739
                                   0, rli->ign_master_log_pos_end,
3740
                                   Rotate_log_event::DUP_NAME);
3741
          rli->ign_master_log_name_end[0]= 0;
3742
          pthread_mutex_unlock(log_lock);
3743 3744 3745 3746 3747 3748 3749 3750 3751 3752
          if (unlikely(!ev))
          {
            errmsg= "Slave SQL thread failed to create a Rotate event "
              "(out of memory?), SHOW SLAVE STATUS may be inaccurate";
            goto err;
          }
          ev->server_id= 0; // don't be ignored by slave SQL thread
          DBUG_RETURN(ev);
        }

grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3753 3754 3755 3756 3757
        /*
          We can, and should release data_lock while we are waiting for
          update. If we do not, show slave status will block
        */
        pthread_mutex_unlock(&rli->data_lock);
3758 3759

        /*
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3760
          Possible deadlock :
3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771
          - the I/O thread has reached log_space_limit
          - the SQL thread has read all relay logs, but cannot purge for some
          reason:
            * it has already purged all logs except the current one
            * there are other logs than the current one but they're involved in
            a transaction that finishes in the current one (or is not finished)
          Solution :
          Wake up the possibly waiting I/O thread, and set a boolean asking
          the I/O thread to temporarily ignore the log_space_limit
          constraint, because we do not want the I/O thread to block because of
          space (it's ok if it blocks for any other reason (e.g. because the
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3772
          master does not send anything). Then the I/O thread stops waiting
3773 3774
          and reads more events.
          The SQL thread decides when the I/O thread should take log_space_limit
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3775
          into account again : ignore_log_space_limit is reset to 0
3776 3777 3778 3779
          in purge_first_log (when the SQL thread purges the just-read relay
          log), and also when the SQL thread starts. We should also reset
          ignore_log_space_limit to 0 when the user does RESET SLAVE, but in
          fact, no need as RESET SLAVE requires that the slave
guilhem@mysql.com's avatar
guilhem@mysql.com committed
3780 3781
          be stopped, and the SQL thread sets ignore_log_space_limit to 0 when
          it stops.
3782 3783 3784
        */
        pthread_mutex_lock(&rli->log_space_lock);
        // prevent the I/O thread from blocking next times
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3785
        rli->ignore_log_space_limit= 1;
3786
        /*
3787 3788 3789 3790
          If the I/O thread is blocked, unblock it.  Ok to broadcast
          after unlock, because the mutex is only destroyed in
          ~Relay_log_info(), i.e. when rli is destroyed, and rli will
          not be destroyed before we exit the present function.
3791
        */
3792
        pthread_mutex_unlock(&rli->log_space_lock);
3793
        pthread_cond_broadcast(&rli->log_space_cond);
3794
        // Note that wait_for_update unlocks lock_log !
3795
        rli->relay_log.wait_for_update(rli->sql_thd, 1);
3796 3797
        // re-acquire data lock since we released it earlier
        pthread_mutex_lock(&rli->data_lock);
3798
        rli->last_master_timestamp= save_timestamp;
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3799
        continue;
3800
      }
3801
      /*
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3802 3803 3804
        If the log was not hot, we need to move to the next log in
        sequence. The next log could be hot or cold, we deal with both
        cases separately after doing some common initialization
3805 3806 3807 3808 3809
      */
      end_io_cache(cur_log);
      DBUG_ASSERT(rli->cur_log_fd >= 0);
      my_close(rli->cur_log_fd, MYF(MY_WME));
      rli->cur_log_fd = -1;
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3810

3811
      if (relay_log_purge)
3812
      {
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3813
        /*
3814 3815 3816 3817 3818 3819 3820 3821 3822 3823
          purge_first_log will properly set up relay log coordinates in rli.
          If the group's coordinates are equal to the event's coordinates
          (i.e. the relay log was not rotated in the middle of a group),
          we can purge this relay log too.
          We do ulonglong and string comparisons, this may be slow but
          - purging the last relay log is nice (it can save 1GB of disk), so we
          like to detect the case where we can do it, and given this,
          - I see no better detection method
          - purge_first_log is not called that often
        */
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3824
        if (rli->relay_log.purge_first_log
3825 3826 3827
            (rli,
             rli->group_relay_log_pos == rli->event_relay_log_pos
             && !strcmp(rli->group_relay_log_name,rli->event_relay_log_name)))
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3828 3829 3830 3831
        {
          errmsg = "Error purging processed logs";
          goto err;
        }
3832
      }
3833 3834
      else
      {
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850
        /*
          If hot_log is set, then we already have a lock on
          LOCK_log.  If not, we have to get the lock.

          According to Sasha, the only time this code will ever be executed
          is if we are recovering from a bug.
        */
        if (rli->relay_log.find_next_log(&rli->linfo, !hot_log))
        {
          errmsg = "error switching to the next log";
          goto err;
        }
        rli->event_relay_log_pos = BIN_LOG_HEADER_SIZE;
        strmake(rli->event_relay_log_name,rli->linfo.log_file_name,
                sizeof(rli->event_relay_log_name)-1);
        flush_relay_log_info(rli);
3851
      }
3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865

      /*
        Now we want to open this next log. To know if it's a hot log (the one
        being written by the I/O thread now) or a cold log, we can use
        is_active(); if it is hot, we use the I/O cache; if it's cold we open
        the file normally. But if is_active() reports that the log is hot, this
        may change between the test and the consequence of the test. So we may
        open the I/O cache whereas the log is now cold, which is nonsense.
        To guard against this, we need to have LOCK_log.
      */

      DBUG_PRINT("info",("hot_log: %d",hot_log));
      if (!hot_log) /* if hot_log, we already have this mutex */
        pthread_mutex_lock(log_lock);
3866 3867
      if (rli->relay_log.is_active(rli->linfo.log_file_name))
      {
3868
#ifdef EXTRA_DEBUG
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3869 3870
        if (global_system_variables.log_warnings)
          sql_print_information("next log '%s' is currently active",
3871
                                rli->linfo.log_file_name);
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3872 3873 3874 3875 3876 3877 3878 3879
#endif
        rli->cur_log= cur_log= rli->relay_log.get_log_file();
        rli->cur_log_old_open_count= rli->relay_log.get_open_count();
        DBUG_ASSERT(rli->cur_log_fd == -1);

        /*
          Read pointer has to be at the start since we are the only
          reader.
3880 3881 3882
          We must keep the LOCK_log to read the 4 first bytes, as this is a hot
          log (same as when we call read_log_event() above: for a hot log we
          take the mutex).
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3883 3884
        */
        if (check_binlog_magic(cur_log,&errmsg))
3885 3886
        {
          if (!hot_log) pthread_mutex_unlock(log_lock);
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3887
          goto err;
3888 3889
        }
        if (!hot_log) pthread_mutex_unlock(log_lock);
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3890
        continue;
3891
      }
3892
      if (!hot_log) pthread_mutex_unlock(log_lock);
3893
      /*
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3894 3895 3896
        if we get here, the log was not hot, so we will have to open it
        ourselves. We are sure that the log is still not hot now (a log can get
        from hot to cold, but not from cold to hot). No need for LOCK_log.
3897 3898
      */
#ifdef EXTRA_DEBUG
3899
      if (global_system_variables.log_warnings)
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3900
        sql_print_information("next log '%s' is not active",
3901
                              rli->linfo.log_file_name);
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3902
#endif
3903 3904
      // open_binlog() will check the magic header
      if ((rli->cur_log_fd=open_binlog(cur_log,rli->linfo.log_file_name,
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3905 3906
                                       &errmsg)) <0)
        goto err;
3907
    }
3908
    else
3909
    {
3910
      /*
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3911 3912
        Read failed with a non-EOF error.
        TODO: come up with something better to handle this error
3913 3914
      */
      if (hot_log)
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3915
        pthread_mutex_unlock(log_lock);
3916
      sql_print_error("Slave SQL thread: I/O error reading \
3917
event(errno: %d  cur_log->error: %d)",
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3918
                      my_errno,cur_log->error);
3919
      // set read position to the beginning of the event
3920
      my_b_seek(cur_log,rli->event_relay_log_pos);
3921 3922
      /* otherwise, we have had a partial read */
      errmsg = "Aborting slave SQL thread because of partial event read";
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3923
      break;                                    // To end of function
3924 3925
    }
  }
3926
  if (!errmsg && global_system_variables.log_warnings)
3927
  {
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3928
    sql_print_information("Error reading relay log event: %s",
3929 3930 3931
                          "slave SQL thread was killed");
    DBUG_RETURN(0);
  }
3932

3933
err:
3934 3935
  if (errmsg)
    sql_print_error("Error reading relay log event: %s", errmsg);
3936
  DBUG_RETURN(0);
3937 3938
}

3939 3940 3941
/*
  Rotate a relay log (this is used only by FLUSH LOGS; the automatic rotation
  because of size is simpler because when we do it we already have all relevant
grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3942
  locks; here we don't, so this function is mainly taking locks).
3943 3944
  Returns nothing as we cannot catch any error (MYSQL_BIN_LOG::new_file()
  is void).
3945 3946
*/

3947
void rotate_relay_log(Master_info* mi)
3948 3949
{
  DBUG_ENTER("rotate_relay_log");
3950
  Relay_log_info* rli= &mi->rli;
guilhem@mysql.com's avatar
guilhem@mysql.com committed
3951

3952 3953 3954
  /* We don't lock rli->run_lock. This would lead to deadlocks. */
  pthread_mutex_lock(&mi->run_lock);

grog@eucla.lemis.com's avatar
grog@eucla.lemis.com committed
3955
  /*
guilhem@mysql.com's avatar
guilhem@mysql.com committed
3956 3957 3958
     We need to test inited because otherwise, new_file() will attempt to lock
     LOCK_log, which may not be inited (if we're not a slave).
  */
3959 3960
  if (!rli->inited)
  {
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3961
    DBUG_PRINT("info", ("rli->inited == 0"));
guilhem@mysql.com's avatar
guilhem@mysql.com committed
3962
    goto end;
3963
  }
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3964

3965
  /* If the relay log is closed, new_file() will do nothing. */
3966
  rli->relay_log.new_file();
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3967

3968 3969 3970 3971
  /*
    We harvest now, because otherwise BIN_LOG_HEADER_SIZE will not immediately
    be counted, so imagine a succession of FLUSH LOGS  and assume the slave
    threads are started:
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
3972 3973 3974 3975 3976 3977
    relay_log_space decreases by the size of the deleted relay log, but does
    not increase, so flush-after-flush we may become negative, which is wrong.
    Even if this will be corrected as soon as a query is replicated on the
    slave (because the I/O thread will then call harvest_bytes_written() which
    will harvest all these BIN_LOG_HEADER_SIZE we forgot), it may give strange
    output in SHOW SLAVE STATUS meanwhile. So we harvest now.
3978 3979 3980 3981
    If the log is closed, then this will just harvest the last writes, probably
    0 as they probably have been harvested.
  */
  rli->relay_log.harvest_bytes_written(&rli->log_space_total);
guilhem@mysql.com's avatar
guilhem@mysql.com committed
3982
end:
3983
  pthread_mutex_unlock(&mi->run_lock);
3984 3985 3986
  DBUG_VOID_RETURN;
}

sasha@mysql.sashanet.com's avatar
sasha@mysql.sashanet.com committed
3987

guilhem@gbichot3.local's avatar
guilhem@gbichot3.local committed
3988 3989 3990
/**
   Detects, based on master's version (as found in the relay log), if master
   has a certain bug.
3991
   @param rli Relay_log_info which tells the master's version
guilhem@gbichot3.local's avatar
guilhem@gbichot3.local committed
3992 3993 3994
   @param bug_id Number of the bug as found in bugs.mysql.com
   @return TRUE if master has the bug, FALSE if it does not.
*/
3995
bool rpl_master_has_bug(Relay_log_info *rli, uint bug_id)
guilhem@gbichot3.local's avatar
guilhem@gbichot3.local committed
3996 3997 3998 3999 4000 4001 4002 4003
{
  struct st_version_range_for_one_bug {
    uint        bug_id;
    const uchar introduced_in[3]; // first version with bug
    const uchar fixed_in[3];      // first version with fix
  };
  static struct st_version_range_for_one_bug versions_for_all_bugs[]=
  {
4004 4005
    {24432, { 5, 0, 24 }, { 5, 0, 38 } },
    {24432, { 5, 1, 12 }, { 5, 1, 17 } }
guilhem@gbichot3.local's avatar
guilhem@gbichot3.local committed
4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025
  };
  const uchar *master_ver=
    rli->relay_log.description_event_for_exec->server_version_split;

  DBUG_ASSERT(sizeof(rli->relay_log.description_event_for_exec->server_version_split) == 3);

  for (uint i= 0;
       i < sizeof(versions_for_all_bugs)/sizeof(*versions_for_all_bugs);i++)
  {
    const uchar *introduced_in= versions_for_all_bugs[i].introduced_in,
      *fixed_in= versions_for_all_bugs[i].fixed_in;
    if ((versions_for_all_bugs[i].bug_id == bug_id) &&
        (memcmp(introduced_in, master_ver, 3) <= 0) &&
        (memcmp(fixed_in,      master_ver, 3) >  0))
    {
      // a short message for SHOW SLAVE STATUS (message length constraints)
      my_printf_error(ER_UNKNOWN_ERROR, "master may suffer from"
                      " http://bugs.mysql.com/bug.php?id=%u"
                      " so slave stops; check error log on slave"
                      " for more info", MYF(0), bug_id);
4026
      // a verbose message for the error log
4027 4028 4029
      rli->report(ERROR_LEVEL, ER_UNKNOWN_ERROR,
                  "According to the master's version ('%s'),"
                  " it is probable that master suffers from this bug:"
4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045
                      " http://bugs.mysql.com/bug.php?id=%u"
                      " and thus replicating the current binary log event"
                      " may make the slave's data become different from the"
                      " master's data."
                      " To take no risk, slave refuses to replicate"
                      " this event and stops."
                      " We recommend that all updates be stopped on the"
                      " master and slave, that the data of both be"
                      " manually synchronized,"
                      " that master's binary logs be deleted,"
                      " that master be upgraded to a version at least"
                      " equal to '%d.%d.%d'. Then replication can be"
                      " restarted.",
                      rli->relay_log.description_event_for_exec->server_version,
                      bug_id,
                      fixed_in[0], fixed_in[1], fixed_in[2]);
guilhem@gbichot3.local's avatar
guilhem@gbichot3.local committed
4046 4047 4048 4049 4050 4051
      return TRUE;
    }
  }
  return FALSE;
}

4052
#ifdef HAVE_EXPLICIT_TEMPLATE_INSTANTIATION
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4053
template class I_List_iterator<i_string>;
sasha@mysql.sashanet.com's avatar
sasha@mysql.sashanet.com committed
4054
template class I_List_iterator<i_string_pair>;
bk@work.mysql.com's avatar
bk@work.mysql.com committed
4055
#endif
4056

4057 4058 4059 4060
/**
  @} (end of group Replication)
*/

hf@deer.mysql.r18.ru's avatar
SCRUM  
hf@deer.mysql.r18.ru committed
4061
#endif /* HAVE_REPLICATION */