Commit 6bbe24e9 authored by Marko Mäkelä's avatar Marko Mäkelä

Bug#14636528 INNODB CHANGE BUFFERING IS NOT ENTIRELY CRASH-SAFE

Delete-mark change buffer records when resorting to a pessimistic
delete from the change buffer B-tree. Skip delete-marked records in
the change buffer merge and when estimating whether an operation can
be buffered. Without this fix, we could try to apply the same buffered
changes multiple times if the server was killed at the right moment.

In MySQL 5.5 and later: ibuf_get_volume_buffered_count_func(): Ignore
delete-marked (already processed) records.

ibuf_delete_rec(): Add a crash point before optimistic delete. If the
optimistic delete fails, flag the record processed before
mtr_commit().

ibuf_merge_or_delete_for_page(): Ignore delete-marked (already
processed) records.

Backport to 5.1: Rename btr_cur_del_unmark_for_ibuf() to
btr_cur_set_deleted_flag_for_ibuf() and add a parameter.

rb:1307 approved by Jimmy Yang
parent a5f36f4c
......@@ -2377,21 +2377,22 @@ btr_cur_del_mark_set_sec_rec(
}
/***************************************************************
Sets a secondary index record delete mark to FALSE. This function is only
Sets a secondary index record delete mark. This function is only
used by the insert buffer insert merge mechanism. */
void
btr_cur_del_unmark_for_ibuf(
/*========================*/
btr_cur_set_deleted_flag_for_ibuf(
/*==============================*/
rec_t* rec, /* in: record to delete unmark */
ibool val, /* in: value to set */
mtr_t* mtr) /* in: mtr */
{
/* We do not need to reserve btr_search_latch, as the page has just
been read to the buffer pool and there cannot be a hash index to it. */
rec_set_deleted_flag(rec, page_is_comp(buf_frame_align(rec)), FALSE);
rec_set_deleted_flag(rec, page_is_comp(buf_frame_align(rec)), val);
btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr);
btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
}
/*==================== B-TREE RECORD REMOVE =========================*/
......
......@@ -9265,8 +9265,8 @@ static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method,
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
PLUGIN_VAR_RQCMDARG,
"Debug flags for InnoDB change buffering (0=none)",
NULL, NULL, 0, 0, 1, 0);
"Debug flags for InnoDB change buffering (0=none, 2=crash at merge)",
NULL, NULL, 0, 0, 2, 0);
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
#ifdef UNIV_DEBUG
......
......@@ -2978,7 +2978,7 @@ ibuf_insert_to_index_page(
/* The records only differ in the delete-mark.
Clear the delete-mark, like we did before
Bug #56680 was fixed. */
btr_cur_del_unmark_for_ibuf(rec, mtr);
btr_cur_set_deleted_flag_for_ibuf(rec, FALSE, mtr);
updated_in_place:
mem_heap_free(heap);
return;
......@@ -3058,6 +3058,22 @@ ibuf_delete_rec(
ut_ad(ibuf_inside());
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
if (ibuf_debug == 2) {
/* Inject a fault (crash). We do this before trying
optimistic delete, because a pessimistic delete in the
change buffer would require a larger test case. */
/* Flag the buffered record as processed, to avoid
an assertion failure after crash recovery. */
btr_cur_set_deleted_flag_for_ibuf(
btr_pcur_get_rec(pcur), TRUE, mtr);
mtr_commit(mtr);
log_make_checkpoint_at(ut_dulint_max, TRUE);
DBUG_SUICIDE();
}
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
if (success) {
......@@ -3072,7 +3088,13 @@ ibuf_delete_rec(
return(FALSE);
}
/* We have to resort to a pessimistic delete from ibuf */
/* We have to resort to a pessimistic delete from ibuf.
Delete-mark the record so that it will not be applied again,
in case the server crashes before the pessimistic delete is
made persistent. */
btr_cur_set_deleted_flag_for_ibuf(
btr_pcur_get_rec(pcur), TRUE, mtr);
btr_pcur_store_position(pcur, mtr);
btr_pcur_commit_specify_mtr(pcur, mtr);
......@@ -3343,7 +3365,7 @@ ibuf_merge_or_delete_for_page(
fputs("InnoDB: Discarding record\n ", stderr);
rec_print_old(stderr, ibuf_rec);
fputs("\n from the insert buffer!\n\n", stderr);
} else if (page) {
} else if (page && !rec_get_deleted_flag(ibuf_rec, 0)) {
/* Now we have at pcur a record which should be
inserted to the index page; NOTE that the call below
copies pointers to fields in ibuf_rec, and we must
......
......@@ -277,13 +277,14 @@ btr_cur_del_mark_set_sec_rec(
que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr */
/***************************************************************
Sets a secondary index record delete mark to FALSE. This function is
only used by the insert buffer insert merge mechanism. */
Sets a secondary index record delete mark. This function is only
used by the insert buffer insert merge mechanism. */
void
btr_cur_del_unmark_for_ibuf(
/*========================*/
btr_cur_set_deleted_flag_for_ibuf(
/*==============================*/
rec_t* rec, /* in: record to delete unmark */
ibool val, /* in: value to set */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Tries to compress a page of the tree on the leaf level. It is assumed
......
2012-09-18 The InnoDB Team
* btr/btr0cur.c, handler/ha_innodb.cc, ibuf/ibuf0ibuf.c,
include/btr0cur.h:
Fix Bug#14636528 INNODB CHANGE BUFFERING IS NOT ENTIRELY CRASH-SAFE
2012-09-17 The InnoDB Team
* btr/btr0btr.c, btr/btr0cur.c, buf/buf0lru.c,
......
......@@ -2769,19 +2769,20 @@ btr_cur_del_mark_set_sec_rec(
return(DB_SUCCESS);
}
/***********************************************************//**
Clear a secondary index record's delete mark. This function is only
/***************************************************************
Sets a secondary index record delete mark. This function is only
used by the insert buffer insert merge mechanism. */
UNIV_INTERN
void
btr_cur_del_unmark_for_ibuf(
/*========================*/
btr_cur_set_deleted_flag_for_ibuf(
/*==============================*/
rec_t* rec, /*!< in/out: record to delete unmark */
page_zip_des_t* page_zip, /*!< in/out: compressed page
corresponding to rec, or NULL
when the tablespace is
uncompressed */
mtr_t* mtr) /*!< in: mtr */
ibool val, /*!< in: value to set */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
/* We do not need to reserve btr_search_latch, as the page
has just been read to the buffer pool and there cannot be
......@@ -2789,9 +2790,9 @@ btr_cur_del_unmark_for_ibuf(
updated in place and the adaptive hash index does not depend
on it. */
btr_rec_set_deleted_flag(rec, page_zip, FALSE);
btr_rec_set_deleted_flag(rec, page_zip, val);
btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr);
btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
}
/*==================== B-TREE RECORD REMOVE =========================*/
......
......@@ -11242,8 +11242,8 @@ static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method,
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
PLUGIN_VAR_RQCMDARG,
"Debug flags for InnoDB change buffering (0=none)",
NULL, NULL, 0, 0, 1, 0);
"Debug flags for InnoDB change buffering (0=none, 2=crash at merge)",
NULL, NULL, 0, 0, 2, 0);
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
......
......@@ -3042,7 +3042,8 @@ ibuf_insert_to_index_page(
/* The records only differ in the delete-mark.
Clear the delete-mark, like we did before
Bug #56680 was fixed. */
btr_cur_del_unmark_for_ibuf(rec, page_zip, mtr);
btr_cur_set_deleted_flag_for_ibuf(
rec, page_zip, FALSE, mtr);
updated_in_place:
mem_heap_free(heap);
return;
......@@ -3127,6 +3128,22 @@ ibuf_delete_rec(
ut_ad(ibuf_rec_get_page_no(btr_pcur_get_rec(pcur)) == page_no);
ut_ad(ibuf_rec_get_space(btr_pcur_get_rec(pcur)) == space);
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
if (ibuf_debug == 2) {
/* Inject a fault (crash). We do this before trying
optimistic delete, because a pessimistic delete in the
change buffer would require a larger test case. */
/* Flag the buffered record as processed, to avoid
an assertion failure after crash recovery. */
btr_cur_set_deleted_flag_for_ibuf(
btr_pcur_get_rec(pcur), NULL, TRUE, mtr);
mtr_commit(mtr);
log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
DBUG_SUICIDE();
}
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
if (success) {
......@@ -3145,7 +3162,13 @@ ibuf_delete_rec(
ut_ad(ibuf_rec_get_page_no(btr_pcur_get_rec(pcur)) == page_no);
ut_ad(ibuf_rec_get_space(btr_pcur_get_rec(pcur)) == space);
/* We have to resort to a pessimistic delete from ibuf */
/* We have to resort to a pessimistic delete from ibuf.
Delete-mark the record so that it will not be applied again,
in case the server crashes before the pessimistic delete is
made persistent. */
btr_cur_set_deleted_flag_for_ibuf(
btr_pcur_get_rec(pcur), NULL, TRUE, mtr);
btr_pcur_store_position(pcur, mtr);
btr_pcur_commit_specify_mtr(pcur, mtr);
......@@ -3454,7 +3477,7 @@ ibuf_merge_or_delete_for_page(
fputs("InnoDB: Discarding record\n ", stderr);
rec_print_old(stderr, rec);
fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
} else if (block) {
} else if (block && !rec_get_deleted_flag(rec, 0)) {
/* Now we have at pcur a record which should be
inserted to the index page; NOTE that the call below
copies pointers to fields in rec, and we must
......
......@@ -357,19 +357,20 @@ btr_cur_del_mark_set_sec_rec(
ibool val, /*!< in: value to set */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr); /*!< in: mtr */
/***********************************************************//**
Clear a secondary index record's delete mark. This function is only
/***************************************************************
Sets a secondary index record delete mark. This function is only
used by the insert buffer insert merge mechanism. */
UNIV_INTERN
void
btr_cur_del_unmark_for_ibuf(
/*========================*/
btr_cur_set_deleted_flag_for_ibuf(
/*==============================*/
rec_t* rec, /*!< in/out: record to delete unmark */
page_zip_des_t* page_zip, /*!< in/out: compressed page
corresponding to rec, or NULL
when the tablespace is
uncompressed */
mtr_t* mtr); /*!< in: mtr */
ibool val, /*!< in: value to set */
mtr_t* mtr); /*!< in/out: mini-transaction */
/*************************************************************//**
Tries to compress a page of the tree if it seems useful. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment