Commit ba78ee0d authored by marko's avatar marko

branches/zip: In the rollback of incomplete transactions after crash

recovery, tolerate clustered index records whose externally stored
columns have not been written.  This should remove the assertion failures
that were reported as Mantis issue#58, issue#62, issue#64.

trx_is_recv(): New function: TRUE if this transaction is rolling back
an incomplete transaction in crash recovery.

enum trx_rbmode: Rollback modes: no rollback, normal rollback, crash recovery.

btr_cur_pessimistic_delete(), btr_free_externally_stored_field(),
btr_rec_free_externally_stored_fields():
Replace the ibool parameter with enum trx_rbmode.

btr_free_externally_stored_field(): If field_ref is zero, return
but assert ut_a(rbmode == RB_RECOVERY).  Unless InnoDB has crashed
while inserting a clustered index record, field_ref should not be zero.

btr_rec_free_updated_extern_fields(): Add the parameter enum trx_rbmode.

btr_cur_pessimistic_update(): Pass the rbmode parameter to
btr_rec_free_updated_extern_fields().

row_undo_ins(), row_undo_mod_upd_del_sec(): If row_build_index_entry()
fails, assert trx_is_recv() and skip this secondary index.

row_undo_mod_upd_del_sec(): Empty the heap at the end of each loop
iteration in order to conserve memory and to reduce the number of
low-level memory allocations.
parent 6cddd9c5
...@@ -2202,7 +2202,7 @@ btr_node_ptr_delete( ...@@ -2202,7 +2202,7 @@ btr_node_ptr_delete(
/* Delete node pointer on father page */ /* Delete node pointer on father page */
btr_page_get_father(index, block, mtr, &cursor); btr_page_get_father(index, block, mtr, &cursor);
compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, FALSE, compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, RB_NONE,
mtr); mtr);
ut_a(err == DB_SUCCESS); ut_a(err == DB_SUCCESS);
......
...@@ -32,6 +32,7 @@ Created 10/16/1994 Heikki Tuuri ...@@ -32,6 +32,7 @@ Created 10/16/1994 Heikki Tuuri
#include "btr0sea.h" #include "btr0sea.h"
#include "row0upd.h" #include "row0upd.h"
#include "trx0rec.h" #include "trx0rec.h"
#include "trx0roll.h" /* trx_is_recv() */
#include "que0que.h" #include "que0que.h"
#include "row0row.h" #include "row0row.h"
#include "srv0srv.h" #include "srv0srv.h"
...@@ -112,6 +113,7 @@ btr_rec_free_updated_extern_fields( ...@@ -112,6 +113,7 @@ btr_rec_free_updated_extern_fields(
part will be updated, or NULL */ part will be updated, or NULL */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
const upd_t* update, /* in: update vector */ const upd_t* update, /* in: update vector */
enum trx_rbmode rbmode, /* in: rollback mode */
mtr_t* mtr); /* in: mini-transaction handle which contains mtr_t* mtr); /* in: mini-transaction handle which contains
an X-latch to record page and to the tree */ an X-latch to record page and to the tree */
/*************************************************************** /***************************************************************
...@@ -126,9 +128,7 @@ btr_rec_free_externally_stored_fields( ...@@ -126,9 +128,7 @@ btr_rec_free_externally_stored_fields(
const ulint* offsets,/* in: rec_get_offsets(rec, index) */ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
page_zip_des_t* page_zip,/* in: compressed page whose uncompressed page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
part will be updated, or NULL */ part will be updated, or NULL */
ibool do_not_free_inherited,/* in: TRUE if called in a enum trx_rbmode rbmode, /* in: rollback mode */
rollback and we do not want to free
inherited fields */
mtr_t* mtr); /* in: mini-transaction handle which contains mtr_t* mtr); /* in: mini-transaction handle which contains
an X-latch to record page and to the index an X-latch to record page and to the index
tree */ tree */
...@@ -2162,8 +2162,9 @@ btr_cur_pessimistic_update( ...@@ -2162,8 +2162,9 @@ btr_cur_pessimistic_update(
ut_ad(big_rec_vec == NULL); ut_ad(big_rec_vec == NULL);
btr_rec_free_updated_extern_fields(index, rec, page_zip, btr_rec_free_updated_extern_fields(
offsets, update, mtr); index, rec, page_zip, offsets, update,
trx_is_recv(trx) ? RB_RECOVERY : RB_NORMAL, mtr);
} }
/* We have to set appropriate extern storage bits in the new /* We have to set appropriate extern storage bits in the new
...@@ -2811,7 +2812,7 @@ btr_cur_pessimistic_delete( ...@@ -2811,7 +2812,7 @@ btr_cur_pessimistic_delete(
if compression does not occur, the cursor if compression does not occur, the cursor
stays valid: it points to successor of stays valid: it points to successor of
deleted record on function exit */ deleted record on function exit */
ibool in_rollback,/* in: TRUE if called in rollback */ enum trx_rbmode rbmode, /* in: rollback mode */
mtr_t* mtr) /* in: mtr */ mtr_t* mtr) /* in: mtr */
{ {
buf_block_t* block; buf_block_t* block;
...@@ -2865,7 +2866,7 @@ btr_cur_pessimistic_delete( ...@@ -2865,7 +2866,7 @@ btr_cur_pessimistic_delete(
if (rec_offs_any_extern(offsets)) { if (rec_offs_any_extern(offsets)) {
btr_rec_free_externally_stored_fields(index, btr_rec_free_externally_stored_fields(index,
rec, offsets, page_zip, rec, offsets, page_zip,
in_rollback, mtr); rbmode, mtr);
#ifdef UNIV_ZIP_DEBUG #ifdef UNIV_ZIP_DEBUG
ut_a(!page_zip || page_zip_validate(page_zip, page)); ut_a(!page_zip || page_zip_validate(page_zip, page));
#endif /* UNIV_ZIP_DEBUG */ #endif /* UNIV_ZIP_DEBUG */
...@@ -4084,9 +4085,7 @@ btr_free_externally_stored_field( ...@@ -4084,9 +4085,7 @@ btr_free_externally_stored_field(
to rec, or NULL if rec == NULL */ to rec, or NULL if rec == NULL */
ulint i, /* in: field number of field_ref; ulint i, /* in: field number of field_ref;
ignored if rec == NULL */ ignored if rec == NULL */
ibool do_not_free_inherited,/* in: TRUE if called in a enum trx_rbmode rbmode, /* in: rollback mode */
rollback and we do not want to free
inherited fields */
mtr_t* local_mtr __attribute__((unused))) /* in: mtr mtr_t* local_mtr __attribute__((unused))) /* in: mtr
containing the latch to data an an containing the latch to data an an
X-latch to the index tree */ X-latch to the index tree */
...@@ -4116,6 +4115,15 @@ btr_free_externally_stored_field( ...@@ -4116,6 +4115,15 @@ btr_free_externally_stored_field(
} }
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
if (UNIV_UNLIKELY(!memcmp(field_ref, field_ref_zero,
BTR_EXTERN_FIELD_REF_SIZE))) {
/* In the rollback of uncommitted transactions, we may
encounter a clustered index record whose BLOBs have
not been written. There is nothing to free then. */
ut_a(rbmode == RB_RECOVERY);
return;
}
space_id = mach_read_from_4(field_ref + BTR_EXTERN_SPACE_ID); space_id = mach_read_from_4(field_ref + BTR_EXTERN_SPACE_ID);
if (UNIV_UNLIKELY(space_id != dict_index_get_space(index))) { if (UNIV_UNLIKELY(space_id != dict_index_get_space(index))) {
...@@ -4160,7 +4168,7 @@ btr_free_externally_stored_field( ...@@ -4160,7 +4168,7 @@ btr_free_externally_stored_field(
|| (mach_read_from_1(field_ref + BTR_EXTERN_LEN) || (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
& BTR_EXTERN_OWNER_FLAG) & BTR_EXTERN_OWNER_FLAG)
/* Rollback and inherited field */ /* Rollback and inherited field */
|| (do_not_free_inherited || (rbmode != RB_NONE
&& (mach_read_from_1(field_ref + BTR_EXTERN_LEN) && (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
& BTR_EXTERN_INHERITED_FLAG))) { & BTR_EXTERN_INHERITED_FLAG))) {
...@@ -4262,9 +4270,7 @@ btr_rec_free_externally_stored_fields( ...@@ -4262,9 +4270,7 @@ btr_rec_free_externally_stored_fields(
const ulint* offsets,/* in: rec_get_offsets(rec, index) */ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
page_zip_des_t* page_zip,/* in: compressed page whose uncompressed page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
part will be updated, or NULL */ part will be updated, or NULL */
ibool do_not_free_inherited,/* in: TRUE if called in a enum trx_rbmode rbmode, /* in: rollback mode */
rollback and we do not want to free
inherited fields */
mtr_t* mtr) /* in: mini-transaction handle which contains mtr_t* mtr) /* in: mini-transaction handle which contains
an X-latch to record page and to the index an X-latch to record page and to the index
tree */ tree */
...@@ -4288,8 +4294,7 @@ btr_rec_free_externally_stored_fields( ...@@ -4288,8 +4294,7 @@ btr_rec_free_externally_stored_fields(
btr_free_externally_stored_field( btr_free_externally_stored_field(
index, data + len - BTR_EXTERN_FIELD_REF_SIZE, index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
rec, offsets, page_zip, i, rec, offsets, page_zip, i, rbmode, mtr);
do_not_free_inherited, mtr);
} }
} }
} }
...@@ -4308,6 +4313,7 @@ btr_rec_free_updated_extern_fields( ...@@ -4308,6 +4313,7 @@ btr_rec_free_updated_extern_fields(
part will be updated, or NULL */ part will be updated, or NULL */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
const upd_t* update, /* in: update vector */ const upd_t* update, /* in: update vector */
enum trx_rbmode rbmode, /* in: rollback mode */
mtr_t* mtr) /* in: mini-transaction handle which contains mtr_t* mtr) /* in: mini-transaction handle which contains
an X-latch to record page and to the tree */ an X-latch to record page and to the tree */
{ {
...@@ -4333,7 +4339,7 @@ btr_rec_free_updated_extern_fields( ...@@ -4333,7 +4339,7 @@ btr_rec_free_updated_extern_fields(
btr_free_externally_stored_field( btr_free_externally_stored_field(
index, data + len - BTR_EXTERN_FIELD_REF_SIZE, index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
rec, offsets, page_zip, rec, offsets, page_zip,
ufield->field_no, TRUE, mtr); ufield->field_no, rbmode, mtr);
} }
} }
} }
......
...@@ -3140,7 +3140,7 @@ ibuf_delete_rec( ...@@ -3140,7 +3140,7 @@ ibuf_delete_rec(
root = ibuf_tree_root_get(ibuf_data, 0, mtr); root = ibuf_tree_root_get(ibuf_data, 0, mtr);
btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur), btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur),
FALSE, mtr); RB_NONE, mtr);
ut_a(err == DB_SUCCESS); ut_a(err == DB_SUCCESS);
#ifdef UNIV_IBUF_COUNT_DEBUG #ifdef UNIV_IBUF_COUNT_DEBUG
......
...@@ -379,7 +379,7 @@ btr_cur_pessimistic_delete( ...@@ -379,7 +379,7 @@ btr_cur_pessimistic_delete(
if compression does not occur, the cursor if compression does not occur, the cursor
stays valid: it points to successor of stays valid: it points to successor of
deleted record on function exit */ deleted record on function exit */
ibool in_rollback,/* in: TRUE if called in rollback */ enum trx_rbmode rbmode, /* in: rollback mode */
mtr_t* mtr); /* in: mtr */ mtr_t* mtr); /* in: mtr */
/*************************************************************** /***************************************************************
Parses a redo log record of updating a record in-place. */ Parses a redo log record of updating a record in-place. */
...@@ -521,9 +521,7 @@ btr_free_externally_stored_field( ...@@ -521,9 +521,7 @@ btr_free_externally_stored_field(
to rec, or NULL if rec == NULL */ to rec, or NULL if rec == NULL */
ulint i, /* in: field number of field_ref; ulint i, /* in: field number of field_ref;
ignored if rec == NULL */ ignored if rec == NULL */
ibool do_not_free_inherited,/* in: TRUE if called in a enum trx_rbmode rbmode, /* in: rollback mode */
rollback and we do not want to free
inherited fields */
mtr_t* local_mtr); /* in: mtr containing the latch to mtr_t* local_mtr); /* in: mtr containing the latch to
data an an X-latch to the index data an an X-latch to the index
tree */ tree */
......
...@@ -15,6 +15,17 @@ Created 3/26/1996 Heikki Tuuri ...@@ -15,6 +15,17 @@ Created 3/26/1996 Heikki Tuuri
#include "mtr0mtr.h" #include "mtr0mtr.h"
#include "trx0sys.h" #include "trx0sys.h"
/***********************************************************************
Determines if this transaction is rolling back an incomplete transaction
in crash recovery. */
UNIV_INTERN
ibool
trx_is_recv(
/*========*/
/* out: TRUE if trx is an incomplete
transaction that is being rolled back
in crash recovery */
const trx_t* trx); /* in: transaction */
/*********************************************************************** /***********************************************************************
Returns a transaction savepoint taken at this point in time. */ Returns a transaction savepoint taken at this point in time. */
UNIV_INTERN UNIV_INTERN
......
...@@ -36,6 +36,14 @@ typedef struct roll_node_struct roll_node_t; ...@@ -36,6 +36,14 @@ typedef struct roll_node_struct roll_node_t;
typedef struct commit_node_struct commit_node_t; typedef struct commit_node_struct commit_node_t;
typedef struct trx_named_savept_struct trx_named_savept_t; typedef struct trx_named_savept_struct trx_named_savept_t;
/* Rollback modes */
enum trx_rbmode {
RB_NONE = 0, /* no rollback */
RB_NORMAL, /* normal rollback */
RB_RECOVERY, /* rolling back an incomplete transaction,
in crash recovery */
};
/* Transaction savepoint */ /* Transaction savepoint */
typedef struct trx_savept_struct trx_savept_t; typedef struct trx_savept_struct trx_savept_t;
struct trx_savept_struct{ struct trx_savept_struct{
......
...@@ -144,7 +144,8 @@ row_purge_remove_clust_if_poss_low( ...@@ -144,7 +144,8 @@ row_purge_remove_clust_if_poss_low(
success = btr_cur_optimistic_delete(btr_cur, &mtr); success = btr_cur_optimistic_delete(btr_cur, &mtr);
} else { } else {
ut_ad(mode == BTR_MODIFY_TREE); ut_ad(mode == BTR_MODIFY_TREE);
btr_cur_pessimistic_delete(&err, FALSE, btr_cur, FALSE, &mtr); btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
RB_NONE, &mtr);
if (err == DB_SUCCESS) { if (err == DB_SUCCESS) {
success = TRUE; success = TRUE;
...@@ -265,7 +266,7 @@ row_purge_remove_sec_if_poss_low( ...@@ -265,7 +266,7 @@ row_purge_remove_sec_if_poss_low(
} else { } else {
ut_ad(mode == BTR_MODIFY_TREE); ut_ad(mode == BTR_MODIFY_TREE);
btr_cur_pessimistic_delete(&err, FALSE, btr_cur, btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
FALSE, &mtr); RB_NONE, &mtr);
success = err == DB_SUCCESS; success = err == DB_SUCCESS;
ut_a(success || err == DB_OUT_OF_FILE_SPACE); ut_a(success || err == DB_OUT_OF_FILE_SPACE);
} }
...@@ -458,7 +459,7 @@ skip_secondaries: ...@@ -458,7 +459,7 @@ skip_secondaries:
index, index,
data_field + dfield_get_len(&ufield->new_val) data_field + dfield_get_len(&ufield->new_val)
- BTR_EXTERN_FIELD_REF_SIZE, - BTR_EXTERN_FIELD_REF_SIZE,
NULL, NULL, NULL, 0, FALSE, &mtr); NULL, NULL, NULL, 0, RB_NONE, &mtr);
mtr_commit(&mtr); mtr_commit(&mtr);
} }
} }
......
...@@ -87,7 +87,10 @@ retry: ...@@ -87,7 +87,10 @@ retry:
&(node->pcur), &mtr); &(node->pcur), &mtr);
ut_a(success); ut_a(success);
btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr); btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
trx_is_recv(node->trx)
? RB_RECOVERY
: RB_NORMAL, &mtr);
/* The delete operation may fail if we have little /* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database file space left: TODO: easiest to crash the database
...@@ -160,7 +163,14 @@ row_undo_ins_remove_sec_low( ...@@ -160,7 +163,14 @@ row_undo_ins_remove_sec_low(
} else { } else {
ut_ad(mode == BTR_MODIFY_TREE); ut_ad(mode == BTR_MODIFY_TREE);
btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr); /* No need to distinguish RB_RECOVERY here, because we
are deleting a secondary index record: the distinction
between RB_NORMAL and RB_RECOVERY only matters when
deleting a record that contains externally stored
columns. */
ut_ad(!dict_index_is_clust(index));
btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
RB_NORMAL, &mtr);
} }
btr_pcur_close(&pcur); btr_pcur_close(&pcur);
...@@ -284,13 +294,25 @@ row_undo_ins( ...@@ -284,13 +294,25 @@ row_undo_ins(
entry = row_build_index_entry(node->row, node->ext, entry = row_build_index_entry(node->row, node->ext,
node->index, node->heap); node->index, node->heap);
ut_a(entry); if (UNIV_UNLIKELY(!entry)) {
/* The database must have crashed after
inserting a clustered index record but before
writing all the externally stored columns of
that record. Because secondary index entries
are inserted after the clustered index record,
we may assume that the secondary index record
does not exist. However, this situation may
only occur during the rollback of incomplete
transactions. */
ut_a(trx_is_recv(node->trx));
} else {
err = row_undo_ins_remove_sec(node->index, entry); err = row_undo_ins_remove_sec(node->index, entry);
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
return(err); return(err);
} }
}
node->index = dict_table_get_next_index(node->index); node->index = dict_table_get_next_index(node->index);
} }
......
...@@ -178,9 +178,9 @@ row_undo_mod_remove_clust_low( ...@@ -178,9 +178,9 @@ row_undo_mod_remove_clust_low(
/* Note that since this operation is analogous to purge, /* Note that since this operation is analogous to purge,
we can free also inherited externally stored fields: we can free also inherited externally stored fields:
hence the last FALSE in the call below */ hence the RB_NONE in the call below */
btr_cur_pessimistic_delete(&err, FALSE, btr_cur, FALSE, mtr); btr_cur_pessimistic_delete(&err, FALSE, btr_cur, RB_NONE, mtr);
/* The delete operation may fail if we have little /* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database file space left: TODO: easiest to crash the database
...@@ -350,8 +350,14 @@ row_undo_mod_del_mark_or_remove_sec_low( ...@@ -350,8 +350,14 @@ row_undo_mod_del_mark_or_remove_sec_low(
} else { } else {
ut_ad(mode == BTR_MODIFY_TREE); ut_ad(mode == BTR_MODIFY_TREE);
/* No need to distinguish RB_RECOVERY here, because we
are deleting a secondary index record: the distinction
between RB_NORMAL and RB_RECOVERY only matters when
deleting a record that contains externally stored
columns. */
ut_ad(!dict_index_is_clust(index));
btr_cur_pessimistic_delete(&err, FALSE, btr_cur, btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
TRUE, &mtr); RB_NORMAL, &mtr);
/* The delete operation may fail if we have little /* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database file space left: TODO: easiest to crash the database
...@@ -506,7 +512,7 @@ row_undo_mod_upd_del_sec( ...@@ -506,7 +512,7 @@ row_undo_mod_upd_del_sec(
mem_heap_t* heap; mem_heap_t* heap;
dtuple_t* entry; dtuple_t* entry;
dict_index_t* index; dict_index_t* index;
ulint err; ulint err = DB_SUCCESS;
heap = mem_heap_create(1024); heap = mem_heap_create(1024);
...@@ -515,22 +521,35 @@ row_undo_mod_upd_del_sec( ...@@ -515,22 +521,35 @@ row_undo_mod_upd_del_sec(
entry = row_build_index_entry(node->row, node->ext, entry = row_build_index_entry(node->row, node->ext,
index, heap); index, heap);
ut_a(entry); if (UNIV_UNLIKELY(!entry)) {
err = row_undo_mod_del_mark_or_remove_sec(node, thr, index, /* The database must have crashed after
entry); inserting a clustered index record but before
if (err != DB_SUCCESS) { writing all the externally stored columns of
that record. Because secondary index entries
are inserted after the clustered index record,
we may assume that the secondary index record
does not exist. However, this situation may
only occur during the rollback of incomplete
transactions. */
ut_a(trx_is_recv(thr_get_trx(thr)));
} else {
err = row_undo_mod_del_mark_or_remove_sec(
node, thr, index, entry);
mem_heap_free(heap); if (err != DB_SUCCESS) {
return(err); break;
}
} }
mem_heap_empty(heap);
node->index = dict_table_get_next_index(node->index); node->index = dict_table_get_next_index(node->index);
} }
mem_heap_free(heap); mem_heap_free(heap);
return(DB_SUCCESS); return(err);
} }
/*************************************************************** /***************************************************************
......
...@@ -373,6 +373,21 @@ trx_release_savepoint_for_mysql( ...@@ -373,6 +373,21 @@ trx_release_savepoint_for_mysql(
return(DB_SUCCESS); return(DB_SUCCESS);
} }
/***********************************************************************
Determines if this transaction is rolling back an incomplete transaction
in crash recovery. */
UNIV_INTERN
ibool
trx_is_recv(
/*========*/
/* out: TRUE if trx is an incomplete
transaction that is being rolled back
in crash recovery */
const trx_t* trx) /* in: transaction */
{
return(trx == trx_roll_crash_recv_trx);
}
/*********************************************************************** /***********************************************************************
Returns a transaction savepoint taken at this point in time. */ Returns a transaction savepoint taken at this point in time. */
UNIV_INTERN UNIV_INTERN
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment