Commit dc9c5554 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-16367 mariabackup: error: failed to copy enough redo log

log_copying_thread(): Keep copying redo log until the end has been
reached. (Previously, we would stop copying as soon as
the first batch of xtrabackup_copy_logfile() returned.)

log_copying: Remove. Use log_copying_running instead.

copy_logfile: Remove. Log copying will now only be invoked from
2 places: from xtrabackup_backup_func() for the initial batch,
and from log_copying_thread() until all of the log has been read.
Use the global variable metadata_to_lsn for determining if the
final part of the log is being copied.

xtrabackup_copy_log(): Add diagnostic messages for terminating
the copying. These messages should be dead code, because
log_group_read_log_seg() should be checking for the same.

xtrabackup_copy_logfile(): Correct the retrying logic.
If anything was successfully read, process the portion that
was read. On failure, let the caller close dst_log_file.

io_watching_thread(): Stop throttling during the last phase
of copying the log (metadata_to_lsn!=0). The final copying
of the log will now be performed in log_copying_thread().

stop_backup_threads(): Clean up the message about stopping
the log copying thread.

xtrabackup_backup_low(): Read metadata_to_lsn from the latest
checkpoint header page, even if it is the first page.
Let the log_copying_thread take care of copying all of
the redo log.
parent 619c277a
...@@ -172,7 +172,6 @@ typedef struct xb_filter_entry_struct xb_filter_entry_t; ...@@ -172,7 +172,6 @@ typedef struct xb_filter_entry_struct xb_filter_entry_t;
lsn_t checkpoint_lsn_start; lsn_t checkpoint_lsn_start;
lsn_t checkpoint_no_start; lsn_t checkpoint_no_start;
static lsn_t log_copy_scanned_lsn; static lsn_t log_copy_scanned_lsn;
static bool log_copying;
static bool log_copying_running; static bool log_copying_running;
static bool io_watching_thread_running; static bool io_watching_thread_running;
...@@ -2452,25 +2451,12 @@ xtrabackup_copy_datafile(fil_node_t* node, uint thread_n) ...@@ -2452,25 +2451,12 @@ xtrabackup_copy_datafile(fil_node_t* node, uint thread_n)
return(FALSE); return(FALSE);
} }
/** How to copy a redo log segment in backup */
enum copy_logfile {
/** Initial copying: copy at least one block */
COPY_FIRST,
/** Tracking while copying data files */
COPY_ONLINE,
/** Final copying: copy until the end of the log */
COPY_LAST
};
/** Copy redo log blocks to the data sink. /** Copy redo log blocks to the data sink.
@param[in] copy how to copy the log @param start_lsn buffer start LSN
@param[in] start_lsn buffer start LSN @param end_lsn buffer end LSN
@param[in] end_lsn buffer end LSN @return last scanned LSN
@return last scanned LSN (equals to last copied LSN if copy=COPY_LAST)
@retval 0 on failure */ @retval 0 on failure */
static static lsn_t xtrabackup_copy_log(lsn_t start_lsn, lsn_t end_lsn)
lsn_t
xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn)
{ {
lsn_t scanned_lsn = start_lsn; lsn_t scanned_lsn = start_lsn;
const byte* log_block = log_sys->buf; const byte* log_block = log_sys->buf;
...@@ -2485,6 +2471,9 @@ xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn) ...@@ -2485,6 +2471,9 @@ xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn)
&& scanned_checkpoint - checkpoint >= 0x80000000UL) { && scanned_checkpoint - checkpoint >= 0x80000000UL) {
/* Garbage from a log buffer flush which was made /* Garbage from a log buffer flush which was made
before the most recent database recovery */ before the most recent database recovery */
msg("mariabackup: checkpoint wrap: "
LSN_PF ",%zx,%zx\n",
scanned_lsn, scanned_checkpoint, checkpoint);
break; break;
} }
...@@ -2505,6 +2494,8 @@ xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn) ...@@ -2505,6 +2494,8 @@ xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn)
>= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE
|| data_len <= LOG_BLOCK_HDR_SIZE) { || data_len <= LOG_BLOCK_HDR_SIZE) {
/* We got a garbage block (abrupt end of the log). */ /* We got a garbage block (abrupt end of the log). */
msg("mariabackup: garbage block: " LSN_PF ",%zu\n",
scanned_lsn, data_len);
break; break;
} else { } else {
/* We got a partial block (abrupt end of the log). */ /* We got a partial block (abrupt end of the log). */
...@@ -2524,7 +2515,7 @@ xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn) ...@@ -2524,7 +2515,7 @@ xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn)
log_sys->log.scanned_lsn = scanned_lsn; log_sys->log.scanned_lsn = scanned_lsn;
end_lsn = copy == COPY_LAST end_lsn = metadata_to_lsn
? ut_uint64_align_up(scanned_lsn, OS_FILE_LOG_BLOCK_SIZE) ? ut_uint64_align_up(scanned_lsn, OS_FILE_LOG_BLOCK_SIZE)
: scanned_lsn & ~lsn_t(OS_FILE_LOG_BLOCK_SIZE - 1); : scanned_lsn & ~lsn_t(OS_FILE_LOG_BLOCK_SIZE - 1);
...@@ -2544,10 +2535,8 @@ xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn) ...@@ -2544,10 +2535,8 @@ xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn)
} }
/** Copy redo log until the current end of the log is reached /** Copy redo log until the current end of the log is reached
@param copy how to copy the log
@return whether the operation failed */ @return whether the operation failed */
static bool static bool xtrabackup_copy_logfile()
xtrabackup_copy_logfile(copy_logfile copy)
{ {
ut_a(dst_log_file != NULL); ut_a(dst_log_file != NULL);
ut_ad(recv_sys != NULL); ut_ad(recv_sys != NULL);
...@@ -2560,33 +2549,29 @@ xtrabackup_copy_logfile(copy_logfile copy) ...@@ -2560,33 +2549,29 @@ xtrabackup_copy_logfile(copy_logfile copy)
start_lsn = ut_uint64_align_down(log_copy_scanned_lsn, start_lsn = ut_uint64_align_down(log_copy_scanned_lsn,
OS_FILE_LOG_BLOCK_SIZE); OS_FILE_LOG_BLOCK_SIZE);
/* When copying the first or last part of the log, retry a few
times to ensure that all log up to the last checkpoint will be
read. */
do { do {
end_lsn = start_lsn + RECV_SCAN_SIZE; end_lsn = start_lsn + RECV_SCAN_SIZE;
xtrabackup_io_throttling(); xtrabackup_io_throttling();
log_mutex_enter(); log_mutex_enter();
lsn_t lsn= start_lsn; lsn_t lsn= start_lsn;
for(int retries= 0; retries < 100; retries++) { for (int retries= 0; retries < 100; retries++) {
if (log_group_read_log_seg(log_sys->buf, &log_sys->log, if (log_group_read_log_seg(log_sys->buf, &log_sys->log,
&lsn, end_lsn)){ &lsn, end_lsn)
|| lsn != start_lsn) {
break; break;
} }
msg("Retrying read of a redo log block"); msg("Retrying read of log at LSN=" LSN_PF "\n", lsn);
my_sleep(1000); my_sleep(1000);
} }
start_lsn = xtrabackup_copy_log(copy, start_lsn, lsn); start_lsn = (lsn == start_lsn)
? 0 : xtrabackup_copy_log(start_lsn, lsn);
log_mutex_exit(); log_mutex_exit();
if (!start_lsn) { if (!start_lsn) {
ds_close(dst_log_file);
dst_log_file = NULL;
msg("mariabackup: Error: xtrabackup_copy_logfile()" msg("mariabackup: Error: xtrabackup_copy_logfile()"
" failed.\n"); " failed.\n");
return(true); return(true);
...@@ -2612,12 +2597,23 @@ static os_thread_ret_t log_copying_thread(void*) ...@@ -2612,12 +2597,23 @@ static os_thread_ret_t log_copying_thread(void*)
*/ */
my_thread_init(); my_thread_init();
do { for (;;) {
os_event_reset(log_copying_stop); os_event_reset(log_copying_stop);
os_event_wait_time_low(log_copying_stop, os_event_wait_time_low(log_copying_stop,
xtrabackup_log_copy_interval * 1000ULL, xtrabackup_log_copy_interval * 1000ULL,
0); 0);
} while (log_copying && xtrabackup_copy_logfile(COPY_ONLINE)); if (xtrabackup_copy_logfile()) {
break;
}
log_mutex_enter();
bool completed = metadata_to_lsn
&& metadata_to_lsn < log_copy_scanned_lsn;
log_mutex_exit();
if (completed) {
break;
}
}
log_copying_running = false; log_copying_running = false;
my_thread_end(); my_thread_end();
...@@ -2632,7 +2628,7 @@ static os_thread_ret_t io_watching_thread(void*) ...@@ -2632,7 +2628,7 @@ static os_thread_ret_t io_watching_thread(void*)
/* currently, for --backup only */ /* currently, for --backup only */
ut_a(xtrabackup_backup); ut_a(xtrabackup_backup);
while (log_copying) { while (log_copying_running && !metadata_to_lsn) {
os_thread_sleep(1000000); /*1 sec*/ os_thread_sleep(1000000); /*1 sec*/
io_ticket = xtrabackup_throttle; io_ticket = xtrabackup_throttle;
os_event_set(wait_throttle); os_event_set(wait_throttle);
...@@ -3662,16 +3658,16 @@ xb_set_max_open_files( ...@@ -3662,16 +3658,16 @@ xb_set_max_open_files(
static void stop_backup_threads() static void stop_backup_threads()
{ {
log_copying = false; if (log_copying_stop && log_copying_running) {
if (log_copying_stop) {
os_event_set(log_copying_stop); os_event_set(log_copying_stop);
msg("mariabackup: Stopping log copying thread.\n"); fputs("mariabackup: Stopping log copying thread", stderr);
fflush(stderr);
while (log_copying_running) { while (log_copying_running) {
msg("."); putc('.', stderr);
fflush(stderr);
os_thread_sleep(200000); /*0.2 sec*/ os_thread_sleep(200000); /*0.2 sec*/
} }
msg("\n"); putc('\n', stderr);
os_event_destroy(log_copying_stop); os_event_destroy(log_copying_stop);
} }
...@@ -3686,10 +3682,10 @@ static void stop_backup_threads() ...@@ -3686,10 +3682,10 @@ static void stop_backup_threads()
/** Implement the core of --backup /** Implement the core of --backup
@return whether the operation succeeded */ @return whether the operation succeeded */
static static bool xtrabackup_backup_low()
bool
xtrabackup_backup_low()
{ {
ut_ad(!metadata_to_lsn);
/* read the latest checkpoint lsn */ /* read the latest checkpoint lsn */
{ {
ulint max_cp_field; ulint max_cp_field;
...@@ -3698,13 +3694,16 @@ xtrabackup_backup_low() ...@@ -3698,13 +3694,16 @@ xtrabackup_backup_low()
if (recv_find_max_checkpoint(&max_cp_field) == DB_SUCCESS if (recv_find_max_checkpoint(&max_cp_field) == DB_SUCCESS
&& log_sys->log.format != 0) { && log_sys->log.format != 0) {
if (max_cp_field == LOG_CHECKPOINT_1) {
log_group_header_read(&log_sys->log,
max_cp_field);
}
metadata_to_lsn = mach_read_from_8( metadata_to_lsn = mach_read_from_8(
log_sys->checkpoint_buf + LOG_CHECKPOINT_LSN); log_sys->checkpoint_buf + LOG_CHECKPOINT_LSN);
msg("mariabackup: The latest check point" msg("mariabackup: The latest check point"
" (for incremental): '" LSN_PF "'\n", " (for incremental): '" LSN_PF "'\n",
metadata_to_lsn); metadata_to_lsn);
} else { } else {
metadata_to_lsn = 0;
msg("mariabackup: Error: recv_find_max_checkpoint() failed.\n"); msg("mariabackup: Error: recv_find_max_checkpoint() failed.\n");
} }
log_mutex_exit(); log_mutex_exit();
...@@ -3712,11 +3711,7 @@ xtrabackup_backup_low() ...@@ -3712,11 +3711,7 @@ xtrabackup_backup_low()
stop_backup_threads(); stop_backup_threads();
if (!dst_log_file || xtrabackup_copy_logfile(COPY_LAST)) { if (ds_close(dst_log_file) || !metadata_to_lsn) {
return false;
}
if (ds_close(dst_log_file)) {
dst_log_file = NULL; dst_log_file = NULL;
return false; return false;
} }
...@@ -3795,6 +3790,7 @@ xtrabackup_backup_func() ...@@ -3795,6 +3790,7 @@ xtrabackup_backup_func()
srv_read_only_mode = TRUE; srv_read_only_mode = TRUE;
srv_operation = SRV_OPERATION_BACKUP; srv_operation = SRV_OPERATION_BACKUP;
metadata_to_lsn = 0;
if (xb_close_files) if (xb_close_files)
msg("mariabackup: warning: close-files specified. Use it " msg("mariabackup: warning: close-files specified. Use it "
...@@ -3805,7 +3801,12 @@ xtrabackup_backup_func() ...@@ -3805,7 +3801,12 @@ xtrabackup_backup_func()
/* initialize components */ /* initialize components */
if(innodb_init_param()) { if(innodb_init_param()) {
fail: fail:
metadata_to_lsn = log_copying_running;
stop_backup_threads(); stop_backup_threads();
if (dst_log_file) {
ds_close(dst_log_file);
dst_log_file = NULL;
}
if (fil_system) { if (fil_system) {
innodb_shutdown(); innodb_shutdown();
} }
...@@ -3814,7 +3815,6 @@ xtrabackup_backup_func() ...@@ -3814,7 +3815,6 @@ xtrabackup_backup_func()
xb_normalize_init_values(); xb_normalize_init_values();
if (srv_file_flush_method_str == NULL) { if (srv_file_flush_method_str == NULL) {
/* These are the default options */ /* These are the default options */
srv_file_flush_method = SRV_FSYNC; srv_file_flush_method = SRV_FSYNC;
...@@ -4058,9 +4058,7 @@ xtrabackup_backup_func() ...@@ -4058,9 +4058,7 @@ xtrabackup_backup_func()
goto log_write_fail; goto log_write_fail;
} }
/* start flag */ log_copying_running = true;
log_copying = TRUE;
/* start io throttle */ /* start io throttle */
if(xtrabackup_throttle) { if(xtrabackup_throttle) {
os_thread_id_t io_watching_thread_id; os_thread_id_t io_watching_thread_id;
...@@ -4078,6 +4076,8 @@ xtrabackup_backup_func() ...@@ -4078,6 +4076,8 @@ xtrabackup_backup_func()
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
msg("mariabackup: error: xb_load_tablespaces() failed with" msg("mariabackup: error: xb_load_tablespaces() failed with"
" error %s.\n", ut_strerr(err)); " error %s.\n", ut_strerr(err));
fail_before_log_copying_thread_start:
log_copying_running = false;
goto fail; goto fail;
} }
...@@ -4085,11 +4085,10 @@ xtrabackup_backup_func() ...@@ -4085,11 +4085,10 @@ xtrabackup_backup_func()
log_copy_scanned_lsn = checkpoint_lsn_start; log_copy_scanned_lsn = checkpoint_lsn_start;
recv_sys->recovered_lsn = log_copy_scanned_lsn; recv_sys->recovered_lsn = log_copy_scanned_lsn;
if (xtrabackup_copy_logfile(COPY_FIRST)) if (xtrabackup_copy_logfile())
goto fail; goto fail_before_log_copying_thread_start;
log_copying_stop = os_event_create(0); log_copying_stop = os_event_create(0);
log_copying_running = true;
os_thread_create(log_copying_thread, NULL, &log_copying_thread_id); os_thread_create(log_copying_thread, NULL, &log_copying_thread_id);
/* FLUSH CHANGED_PAGE_BITMAPS call */ /* FLUSH CHANGED_PAGE_BITMAPS call */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment