Commit b36da66b authored by Marko Mäkelä's avatar Marko Mäkelä

Revert most of revno 3560.9.1 (Bug#12704861)

This was an attempt to address problems with the Bug#12612184 fix.
Even with this follow-up fix, crash recovery can be broken.
Let us fix the bug later.
parent e27623a7
...@@ -1094,6 +1094,20 @@ COMMIT; ...@@ -1094,6 +1094,20 @@ COMMIT;
UPDATE bug12547647 SET c = REPEAT('b',16928); UPDATE bug12547647 SET c = REPEAT('b',16928);
ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs
DROP TABLE bug12547647; DROP TABLE bug12547647;
SET @r=REPEAT('a',500);
CREATE TABLE t1(a INT,
v1 VARCHAR(500), v2 VARCHAR(500), v3 VARCHAR(500),
v4 VARCHAR(500), v5 VARCHAR(500), v6 VARCHAR(500),
v7 VARCHAR(500), v8 VARCHAR(500), v9 VARCHAR(500),
v10 VARCHAR(500), v11 VARCHAR(500), v12 VARCHAR(500),
v13 VARCHAR(500), v14 VARCHAR(500), v15 VARCHAR(500),
v16 VARCHAR(500), v17 VARCHAR(500), v18 VARCHAR(500)
) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
CREATE INDEX idx1 ON t1(a,v1);
INSERT INTO t1 VALUES(9,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
UPDATE t1 SET a=1000;
DELETE FROM t1;
DROP TABLE t1;
set global innodb_file_per_table=0; set global innodb_file_per_table=0;
set global innodb_file_format=Antelope; set global innodb_file_format=Antelope;
set global innodb_file_format_check=Antelope; set global innodb_file_format_check=Antelope;
......
...@@ -527,13 +527,30 @@ CREATE TABLE bug12547647( ...@@ -527,13 +527,30 @@ CREATE TABLE bug12547647(
a INT NOT NULL, b BLOB NOT NULL, c TEXT, a INT NOT NULL, b BLOB NOT NULL, c TEXT,
PRIMARY KEY (b(10), a), INDEX (c(10)) PRIMARY KEY (b(10), a), INDEX (c(10))
) ENGINE=InnoDB ROW_FORMAT=DYNAMIC; ) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
INSERT INTO bug12547647 VALUES (5,repeat('khdfo5AlOq',1900),repeat('g',7731)); INSERT INTO bug12547647 VALUES (5,repeat('khdfo5AlOq',1900),repeat('g',7731));
COMMIT; COMMIT;
# The following used to cause infinite undo log allocation. # The following used to cause infinite undo log allocation.
--error ER_TOO_BIG_ROWSIZE --error ER_TOO_BIG_ROWSIZE
UPDATE bug12547647 SET c = REPEAT('b',16928); UPDATE bug12547647 SET c = REPEAT('b',16928);
DROP TABLE bug12547647; DROP TABLE bug12547647;
# Bug#12637786
SET @r=REPEAT('a',500);
CREATE TABLE t1(a INT,
v1 VARCHAR(500), v2 VARCHAR(500), v3 VARCHAR(500),
v4 VARCHAR(500), v5 VARCHAR(500), v6 VARCHAR(500),
v7 VARCHAR(500), v8 VARCHAR(500), v9 VARCHAR(500),
v10 VARCHAR(500), v11 VARCHAR(500), v12 VARCHAR(500),
v13 VARCHAR(500), v14 VARCHAR(500), v15 VARCHAR(500),
v16 VARCHAR(500), v17 VARCHAR(500), v18 VARCHAR(500)
) ENGINE=InnoDB ROW_FORMAT=DYNAMIC;
CREATE INDEX idx1 ON t1(a,v1);
INSERT INTO t1 VALUES(9,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r,@r);
UPDATE t1 SET a=1000;
DELETE FROM t1;
# Let the purge thread clean up this file.
-- sleep 10
DROP TABLE t1;
eval set global innodb_file_per_table=$per_table; eval set global innodb_file_per_table=$per_table;
eval set global innodb_file_format=$format; eval set global innodb_file_format=$format;
......
...@@ -300,30 +300,29 @@ btr_page_alloc_for_ibuf( ...@@ -300,30 +300,29 @@ btr_page_alloc_for_ibuf(
/****************************************************************** /******************************************************************
Allocates a new file page to be used in an index tree. NOTE: we assume Allocates a new file page to be used in an index tree. NOTE: we assume
that the caller has made the reservation for free extents! */ that the caller has made the reservation for free extents! */
static
ulint page_t*
btr_page_alloc_low( btr_page_alloc(
/*===============*/ /*===========*/
/* out: allocated page number, /* out: new allocated page, x-latched;
FIL_NULL if out of space */ NULL if out of space */
dict_index_t* index, /* in: index */ dict_index_t* index, /* in: index */
ulint hint_page_no, /* in: hint of a good page */ ulint hint_page_no, /* in: hint of a good page */
byte file_direction, /* in: direction where a possible byte file_direction, /* in: direction where a possible
page split is made */ page split is made */
ulint level, /* in: level where the page is placed ulint level, /* in: level where the page is placed
in the tree */ in the tree */
mtr_t* mtr, /* in/out: mini-transaction mtr_t* mtr) /* in: mtr */
for the allocation */
mtr_t* init_mtr) /* in/out: mini-transaction
in which the page should be
initialized (may be the same
as mtr), or NULL if it should
not be initialized (the page
at hint was previously freed
in mtr) */
{ {
fseg_header_t* seg_header; fseg_header_t* seg_header;
page_t* root; page_t* root;
page_t* new_page;
ulint new_page_no;
if (index->type & DICT_IBUF) {
return(btr_page_alloc_for_ibuf(index, mtr));
}
root = btr_root_get(index, mtr); root = btr_root_get(index, mtr);
...@@ -337,61 +336,19 @@ btr_page_alloc_low( ...@@ -337,61 +336,19 @@ btr_page_alloc_low(
reservation for free extents, and thus we know that a page can reservation for free extents, and thus we know that a page can
be allocated: */ be allocated: */
return(fseg_alloc_free_page_general(seg_header, hint_page_no, new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
file_direction, TRUE, file_direction, TRUE, mtr);
mtr, init_mtr));
}
/**************************************************************//**
Allocates a new file page to be used in an index tree. NOTE: we assume
that the caller has made the reservation for free extents! */
page_t*
btr_page_alloc(
/*===========*/
/* out: new allocated block, x-latched;
NULL if out of space */
dict_index_t* index, /* in: index */
ulint hint_page_no, /* in: hint of a good page */
byte file_direction, /* in: direction where a possible
page split is made */
ulint level, /* in: level where the page is placed
in the tree */
mtr_t* mtr, /* in/out: mini-transaction
for the allocation */
mtr_t* init_mtr) /* in/out: mini-transaction
for x-latching and initializing
the page */
{
page_t* new_page;
ulint new_page_no;
if (index->type & DICT_IBUF) {
return(btr_page_alloc_for_ibuf(index, mtr));
}
new_page_no = btr_page_alloc_low(
index, hint_page_no, file_direction, level, mtr, init_mtr);
if (new_page_no == FIL_NULL) { if (new_page_no == FIL_NULL) {
return(NULL); return(NULL);
} }
new_page = buf_page_get(dict_index_get_space(index), new_page_no, new_page = buf_page_get(dict_index_get_space(index), new_page_no,
RW_X_LATCH, init_mtr); RW_X_LATCH, mtr);
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW); buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW);
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
if (mtr->freed_clust_leaf) {
mtr_memo_release(mtr, new_page, MTR_MEMO_FREE_CLUST_LEAF);
ut_ad(!mtr_memo_contains(mtr, buf_block_align(new_page),
MTR_MEMO_FREE_CLUST_LEAF));
}
ut_ad(btr_freed_leaves_validate(mtr));
return(new_page); return(new_page);
} }
...@@ -538,138 +495,8 @@ btr_page_free( ...@@ -538,138 +495,8 @@ btr_page_free(
level = btr_page_get_level(page, mtr); level = btr_page_get_level(page, mtr);
btr_page_free_low(index, page, level, mtr); btr_page_free_low(index, page, level, mtr);
/* The handling of MTR_MEMO_FREE_CLUST_LEAF assumes this. */
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
if (level == 0 && (index->type & DICT_CLUSTERED)) {
/* We may have to call btr_mark_freed_leaves() to
temporarily mark the block nonfree for invoking
btr_store_big_rec_extern_fields() after an
update. Remember that the block was freed. */
mtr->freed_clust_leaf = TRUE;
mtr_memo_push(mtr, buf_block_align(page),
MTR_MEMO_FREE_CLUST_LEAF);
}
ut_ad(btr_freed_leaves_validate(mtr));
} }
/**************************************************************//**
Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free.
For invoking btr_store_big_rec_extern_fields() after an update,
we must temporarily mark freed clustered index pages allocated, so
that off-page columns will not be allocated from them. Between the
btr_store_big_rec_extern_fields() and mtr_commit() we have to
mark the pages free again, so that no pages will be leaked. */
void
btr_mark_freed_leaves(
/*==================*/
dict_index_t* index, /* in/out: clustered index */
mtr_t* mtr, /* in/out: mini-transaction */
ibool nonfree)/* in: TRUE=mark nonfree, FALSE=mark freed */
{
/* This is loosely based on mtr_memo_release(). */
ulint offset;
ut_ad(index->type & DICT_CLUSTERED);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
ut_ad(mtr->state == MTR_ACTIVE);
if (!mtr->freed_clust_leaf) {
return;
}
offset = dyn_array_get_data_size(&mtr->memo);
while (offset > 0) {
mtr_memo_slot_t* slot;
buf_block_t* block;
offset -= sizeof *slot;
slot = dyn_array_get_element(&mtr->memo, offset);
if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) {
continue;
}
/* Because btr_page_alloc() does invoke
mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all
blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the
memo must still be clustered index leaf tree pages. */
block = slot->object;
ut_a(buf_block_get_space(block)
== dict_index_get_space(index));
ut_a(fil_page_get_type(buf_block_get_frame(block))
== FIL_PAGE_INDEX);
ut_a(btr_page_get_level(buf_block_get_frame(block), mtr) == 0);
if (nonfree) {
/* Allocate the same page again. */
ulint page_no;
page_no = btr_page_alloc_low(
index, buf_block_get_page_no(block),
FSP_NO_DIR, 0, mtr, NULL);
ut_a(page_no == buf_block_get_page_no(block));
} else {
/* Assert that the page is allocated and free it. */
btr_page_free_low(index, buf_block_get_frame(block),
0, mtr);
}
}
ut_ad(btr_freed_leaves_validate(mtr));
}
#ifdef UNIV_DEBUG
/**************************************************************//**
Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF.
See btr_mark_freed_leaves(). */
ibool
btr_freed_leaves_validate(
/*======================*/
/* out: TRUE if valid */
mtr_t* mtr) /* in: mini-transaction */
{
ulint offset;
ut_ad(mtr->magic_n == MTR_MAGIC_N);
ut_ad(mtr->state == MTR_ACTIVE);
offset = dyn_array_get_data_size(&mtr->memo);
while (offset > 0) {
mtr_memo_slot_t* slot;
buf_block_t* block;
offset -= sizeof *slot;
slot = dyn_array_get_element(&mtr->memo, offset);
if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) {
continue;
}
ut_a(mtr->freed_clust_leaf);
/* Because btr_page_alloc() does invoke
mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all
blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the
memo must still be clustered index leaf tree pages. */
block = slot->object;
ut_a(fil_page_get_type(buf_block_get_frame(block))
== FIL_PAGE_INDEX);
ut_a(btr_page_get_level(buf_block_get_frame(block), mtr) == 0);
}
return(TRUE);
}
#endif /* UNIV_DEBUG */
/****************************************************************** /******************************************************************
Sets the child node file address in a node pointer. */ Sets the child node file address in a node pointer. */
UNIV_INLINE UNIV_INLINE
...@@ -1199,7 +1026,7 @@ btr_root_raise_and_insert( ...@@ -1199,7 +1026,7 @@ btr_root_raise_and_insert(
a node pointer to the new page, and then splitting the new page. */ a node pointer to the new page, and then splitting the new page. */
new_page = btr_page_alloc(index, 0, FSP_NO_DIR, new_page = btr_page_alloc(index, 0, FSP_NO_DIR,
btr_page_get_level(root, mtr), mtr, mtr); btr_page_get_level(root, mtr), mtr);
btr_page_create(new_page, index, mtr); btr_page_create(new_page, index, mtr);
...@@ -1820,7 +1647,7 @@ btr_page_split_and_insert( ...@@ -1820,7 +1647,7 @@ btr_page_split_and_insert(
/* 2. Allocate a new page to the index */ /* 2. Allocate a new page to the index */
new_page = btr_page_alloc(cursor->index, hint_page_no, direction, new_page = btr_page_alloc(cursor->index, hint_page_no, direction,
btr_page_get_level(page, mtr), mtr, mtr); btr_page_get_level(page, mtr), mtr);
btr_page_create(new_page, cursor->index, mtr); btr_page_create(new_page, cursor->index, mtr);
/* 3. Calculate the first record on the upper half-page, and the /* 3. Calculate the first record on the upper half-page, and the
......
...@@ -2051,6 +2051,43 @@ btr_cur_pessimistic_update( ...@@ -2051,6 +2051,43 @@ btr_cur_pessimistic_update(
return(err); return(err);
} }
/*****************************************************************
Commits and restarts a mini-transaction so that it will retain an
x-lock on index->lock and the cursor page. */
void
btr_cur_mtr_commit_and_start(
/*=========================*/
btr_cur_t* cursor, /* in: cursor */
mtr_t* mtr) /* in/out: mini-transaction */
{
buf_block_t* block;
block = buf_block_align(btr_cur_get_rec(cursor));
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
MTR_MEMO_X_LOCK));
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
/* Keep the locks across the mtr_commit(mtr). */
rw_lock_x_lock(dict_index_get_lock(cursor->index));
rw_lock_x_lock(&block->lock);
mutex_enter(&block->mutex);
#ifdef UNIV_SYNC_DEBUG
buf_block_buf_fix_inc_debug(block, __FILE__, __LINE__);
#else
buf_block_buf_fix_inc(block);
#endif
mutex_exit(&block->mutex);
/* Write out the redo log. */
mtr_commit(mtr);
mtr_start(mtr);
/* Reassociate the locks with the mini-transaction.
They will be released on mtr_commit(mtr). */
mtr_memo_push(mtr, dict_index_get_lock(cursor->index),
MTR_MEMO_X_LOCK);
mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX);
}
/*==================== B-TREE DELETE MARK AND UNMARK ===============*/ /*==================== B-TREE DELETE MARK AND UNMARK ===============*/
/******************************************************************** /********************************************************************
...@@ -3449,11 +3486,6 @@ btr_store_big_rec_extern_fields( ...@@ -3449,11 +3486,6 @@ btr_store_big_rec_extern_fields(
this function returns */ this function returns */
big_rec_t* big_rec_vec, /* in: vector containing fields big_rec_t* big_rec_vec, /* in: vector containing fields
to be stored externally */ to be stored externally */
mtr_t* alloc_mtr, /* in/out: in an insert, NULL;
in an update, local_mtr for
allocating BLOB pages and
updating BLOB pointers; alloc_mtr
must not have freed any leaf pages */
mtr_t* local_mtr __attribute__((unused))) /* in: mtr mtr_t* local_mtr __attribute__((unused))) /* in: mtr
containing the latch to rec and to the containing the latch to rec and to the
tree */ tree */
...@@ -3474,8 +3506,6 @@ btr_store_big_rec_extern_fields( ...@@ -3474,8 +3506,6 @@ btr_store_big_rec_extern_fields(
ulint i; ulint i;
mtr_t mtr; mtr_t mtr;
ut_ad(local_mtr);
ut_ad(!alloc_mtr || alloc_mtr == local_mtr);
ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index), ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK)); MTR_MEMO_X_LOCK));
...@@ -3485,25 +3515,6 @@ btr_store_big_rec_extern_fields( ...@@ -3485,25 +3515,6 @@ btr_store_big_rec_extern_fields(
space_id = buf_frame_get_space_id(rec); space_id = buf_frame_get_space_id(rec);
if (alloc_mtr) {
/* Because alloc_mtr will be committed after
mtr, it is possible that the tablespace has been
extended when the B-tree record was updated or
inserted, or it will be extended while allocating
pages for big_rec.
TODO: In mtr (not alloc_mtr), write a redo log record
about extending the tablespace to its current size,
and remember the current size. Whenever the tablespace
grows as pages are allocated, write further redo log
records to mtr. (Currently tablespace extension is not
covered by the redo log. If it were, the record would
only be written to alloc_mtr, which is committed after
mtr.) */
} else {
alloc_mtr = &mtr;
}
/* We have to create a file segment to the tablespace /* We have to create a file segment to the tablespace
for each field and put the pointer to the field in rec */ for each field and put the pointer to the field in rec */
...@@ -3530,7 +3541,7 @@ btr_store_big_rec_extern_fields( ...@@ -3530,7 +3541,7 @@ btr_store_big_rec_extern_fields(
} }
page = btr_page_alloc(index, hint_page_no, page = btr_page_alloc(index, hint_page_no,
FSP_NO_DIR, 0, alloc_mtr, &mtr); FSP_NO_DIR, 0, &mtr);
if (page == NULL) { if (page == NULL) {
mtr_commit(&mtr); mtr_commit(&mtr);
...@@ -3584,42 +3595,37 @@ btr_store_big_rec_extern_fields( ...@@ -3584,42 +3595,37 @@ btr_store_big_rec_extern_fields(
extern_len -= store_len; extern_len -= store_len;
if (alloc_mtr == &mtr) {
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
rec_page = rec_page =
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
buf_page_get( buf_page_get(space_id,
space_id, buf_frame_get_page_no(data),
buf_frame_get_page_no(data), RW_X_LATCH, &mtr);
RW_X_LATCH, &mtr);
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
buf_page_dbg_add_level( buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
rec_page, SYNC_NO_ORDER_CHECK);
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
}
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, 0, mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, 0,
MLOG_4BYTES, alloc_mtr); MLOG_4BYTES, &mtr);
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4, mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
big_rec_vec->fields[i].len big_rec_vec->fields[i].len
- extern_len, - extern_len,
MLOG_4BYTES, alloc_mtr); MLOG_4BYTES, &mtr);
if (prev_page_no == FIL_NULL) { if (prev_page_no == FIL_NULL) {
mlog_write_ulint(data + local_len mlog_write_ulint(data + local_len
+ BTR_EXTERN_SPACE_ID, + BTR_EXTERN_SPACE_ID,
space_id, space_id,
MLOG_4BYTES, alloc_mtr); MLOG_4BYTES, &mtr);
mlog_write_ulint(data + local_len mlog_write_ulint(data + local_len
+ BTR_EXTERN_PAGE_NO, + BTR_EXTERN_PAGE_NO,
page_no, page_no,
MLOG_4BYTES, alloc_mtr); MLOG_4BYTES, &mtr);
mlog_write_ulint(data + local_len mlog_write_ulint(data + local_len
+ BTR_EXTERN_OFFSET, + BTR_EXTERN_OFFSET,
FIL_PAGE_DATA, FIL_PAGE_DATA,
MLOG_4BYTES, alloc_mtr); MLOG_4BYTES, &mtr);
/* Set the bit denoting that this field /* Set the bit denoting that this field
in rec is stored externally */ in rec is stored externally */
...@@ -3627,7 +3633,7 @@ btr_store_big_rec_extern_fields( ...@@ -3627,7 +3633,7 @@ btr_store_big_rec_extern_fields(
rec_set_nth_field_extern_bit( rec_set_nth_field_extern_bit(
rec, index, rec, index,
big_rec_vec->fields[i].field_no, big_rec_vec->fields[i].field_no,
TRUE, alloc_mtr); TRUE, &mtr);
} }
prev_page_no = page_no; prev_page_no = page_no;
......
This diff is collapsed.
...@@ -379,11 +379,7 @@ btr_page_alloc( ...@@ -379,11 +379,7 @@ btr_page_alloc(
page split is made */ page split is made */
ulint level, /* in: level where the page is placed ulint level, /* in: level where the page is placed
in the tree */ in the tree */
mtr_t* mtr, /* in/out: mini-transaction mtr_t* mtr); /* in: mtr */
for the allocation */
mtr_t* init_mtr); /* in/out: mini-transaction
for x-latching and initializing
the page */
/****************************************************************** /******************************************************************
Frees a file page used in an index tree. NOTE: cannot free field external Frees a file page used in an index tree. NOTE: cannot free field external
storage pages because the page must contain info on its level. */ storage pages because the page must contain info on its level. */
...@@ -406,31 +402,6 @@ btr_page_free_low( ...@@ -406,31 +402,6 @@ btr_page_free_low(
page_t* page, /* in: page to be freed, x-latched */ page_t* page, /* in: page to be freed, x-latched */
ulint level, /* in: page level */ ulint level, /* in: page level */
mtr_t* mtr); /* in: mtr */ mtr_t* mtr); /* in: mtr */
/**************************************************************//**
Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free.
For invoking btr_store_big_rec_extern_fields() after an update,
we must temporarily mark freed clustered index pages allocated, so
that off-page columns will not be allocated from them. Between the
btr_store_big_rec_extern_fields() and mtr_commit() we have to
mark the pages free again, so that no pages will be leaked. */
void
btr_mark_freed_leaves(
/*==================*/
dict_index_t* index, /* in/out: clustered index */
mtr_t* mtr, /* in/out: mini-transaction */
ibool nonfree);/* in: TRUE=mark nonfree, FALSE=mark freed */
#ifdef UNIV_DEBUG
/**************************************************************//**
Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF.
See btr_mark_freed_leaves(). */
ibool
btr_freed_leaves_validate(
/*======================*/
/* out: TRUE if valid */
mtr_t* mtr); /* in: mini-transaction */
#endif /* UNIV_DEBUG */
#ifdef UNIV_BTR_PRINT #ifdef UNIV_BTR_PRINT
/***************************************************************** /*****************************************************************
Prints size info of a B-tree. */ Prints size info of a B-tree. */
......
...@@ -252,6 +252,15 @@ btr_cur_pessimistic_update( ...@@ -252,6 +252,15 @@ btr_cur_pessimistic_update(
updates */ updates */
que_thr_t* thr, /* in: query thread */ que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr */ mtr_t* mtr); /* in: mtr */
/*****************************************************************
Commits and restarts a mini-transaction so that it will retain an
x-lock on index->lock and the cursor page. */
void
btr_cur_mtr_commit_and_start(
/*=========================*/
btr_cur_t* cursor, /* in: cursor */
mtr_t* mtr); /* in/out: mini-transaction */
/*************************************************************** /***************************************************************
Marks a clustered index record deleted. Writes an undo log record to Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id undo log on this delete marking. Writes in the trx id field the id
...@@ -462,11 +471,6 @@ btr_store_big_rec_extern_fields( ...@@ -462,11 +471,6 @@ btr_store_big_rec_extern_fields(
this function returns */ this function returns */
big_rec_t* big_rec_vec, /* in: vector containing fields big_rec_t* big_rec_vec, /* in: vector containing fields
to be stored externally */ to be stored externally */
mtr_t* alloc_mtr, /* in/out: in an insert, NULL;
in an update, local_mtr for
allocating BLOB pages and
updating BLOB pointers; alloc_mtr
must not have freed any leaf pages */
mtr_t* local_mtr); /* in: mtr containing the latch to mtr_t* local_mtr); /* in: mtr containing the latch to
rec and to the tree */ rec and to the tree */
/*********************************************************************** /***********************************************************************
......
...@@ -179,11 +179,7 @@ fseg_alloc_free_page_general( ...@@ -179,11 +179,7 @@ fseg_alloc_free_page_general(
with fsp_reserve_free_extents, then there with fsp_reserve_free_extents, then there
is no need to do the check for this individual is no need to do the check for this individual
page */ page */
mtr_t* mtr, /* in/out: mini-transaction */ mtr_t* mtr); /* in/out: mini-transaction */
mtr_t* init_mtr);/* in/out: mtr or another mini-transaction
in which the page should be initialized,
or NULL if this is a "fake allocation" of
a page that was previously freed in mtr */
/************************************************************************** /**************************************************************************
Reserves free pages from a tablespace. All mini-transactions which may Reserves free pages from a tablespace. All mini-transactions which may
use several pages from the tablespace should call this function beforehand use several pages from the tablespace should call this function beforehand
......
...@@ -36,8 +36,6 @@ first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ ...@@ -36,8 +36,6 @@ first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
#define MTR_MEMO_MODIFY 54 #define MTR_MEMO_MODIFY 54
#define MTR_MEMO_S_LOCK 55 #define MTR_MEMO_S_LOCK 55
#define MTR_MEMO_X_LOCK 56 #define MTR_MEMO_X_LOCK 56
/* The mini-transaction freed a clustered index leaf page. */
#define MTR_MEMO_FREE_CLUST_LEAF 57
/* Log item types: we have made them to be of the type 'byte' /* Log item types: we have made them to be of the type 'byte'
for the compiler to warn if val and type parameters are switched for the compiler to warn if val and type parameters are switched
...@@ -317,12 +315,9 @@ struct mtr_struct{ ...@@ -317,12 +315,9 @@ struct mtr_struct{
ulint state; /* MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */ ulint state; /* MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
dyn_array_t memo; /* memo stack for locks etc. */ dyn_array_t memo; /* memo stack for locks etc. */
dyn_array_t log; /* mini-transaction log */ dyn_array_t log; /* mini-transaction log */
unsigned modifications:1; ibool modifications;
/* TRUE if the mtr made modifications to /* TRUE if the mtr made modifications to
buffer pool pages */ buffer pool pages */
unsigned freed_clust_leaf:1;
/* TRUE if MTR_MEMO_FREE_CLUST_LEAF
was logged in the mini-transaction */
ulint n_log_recs; ulint n_log_recs;
/* count of how many page initial log records /* count of how many page initial log records
have been written to the mtr log */ have been written to the mtr log */
......
...@@ -26,7 +26,6 @@ mtr_start( ...@@ -26,7 +26,6 @@ mtr_start(
mtr->log_mode = MTR_LOG_ALL; mtr->log_mode = MTR_LOG_ALL;
mtr->modifications = FALSE; mtr->modifications = FALSE;
mtr->freed_clust_leaf = FALSE;
mtr->n_log_recs = 0; mtr->n_log_recs = 0;
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
...@@ -51,8 +50,7 @@ mtr_memo_push( ...@@ -51,8 +50,7 @@ mtr_memo_push(
ut_ad(object); ut_ad(object);
ut_ad(type >= MTR_MEMO_PAGE_S_FIX); ut_ad(type >= MTR_MEMO_PAGE_S_FIX);
ut_ad(type <= MTR_MEMO_FREE_CLUST_LEAF); ut_ad(type <= MTR_MEMO_X_LOCK);
ut_ad(type != MTR_MEMO_FREE_CLUST_LEAF || mtr->freed_clust_leaf);
ut_ad(mtr); ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N); ut_ad(mtr->magic_n == MTR_MAGIC_N);
......
...@@ -53,13 +53,17 @@ mtr_memo_slot_release( ...@@ -53,13 +53,17 @@ mtr_memo_slot_release(
buf_page_release((buf_block_t*)object, type, mtr); buf_page_release((buf_block_t*)object, type, mtr);
} else if (type == MTR_MEMO_S_LOCK) { } else if (type == MTR_MEMO_S_LOCK) {
rw_lock_s_unlock((rw_lock_t*)object); rw_lock_s_unlock((rw_lock_t*)object);
} else if (type != MTR_MEMO_X_LOCK) { #ifdef UNIV_DEBUG
ut_ad(type == MTR_MEMO_MODIFY } else if (type == MTR_MEMO_X_LOCK) {
|| type == MTR_MEMO_FREE_CLUST_LEAF); rw_lock_x_unlock((rw_lock_t*)object);
} else {
ut_ad(type == MTR_MEMO_MODIFY);
ut_ad(mtr_memo_contains(mtr, object, ut_ad(mtr_memo_contains(mtr, object,
MTR_MEMO_PAGE_X_FIX)); MTR_MEMO_PAGE_X_FIX));
#else
} else { } else {
rw_lock_x_unlock((rw_lock_t*)object); rw_lock_x_unlock((rw_lock_t*)object);
#endif
} }
} }
......
...@@ -2090,20 +2090,15 @@ row_ins_index_entry_low( ...@@ -2090,20 +2090,15 @@ row_ins_index_entry_low(
if (big_rec) { if (big_rec) {
ut_a(err == DB_SUCCESS); ut_a(err == DB_SUCCESS);
/* Write out the externally stored /* Write out the externally stored
columns, but allocate the pages and columns while still x-latching
write the pointers using the index->lock and block->lock. We have
mini-transaction of the record update. to mtr_commit(mtr) first, so that the
If any pages were freed in the update, redo log will be written in the
temporarily mark them allocated so correct order. Otherwise, we would run
that off-page columns will not into trouble on crash recovery if mtr
overwrite them. We must do this, freed B-tree pages on which some of
because we will write the redo log for the big_rec fields will be written. */
the BLOB writes before writing the btr_cur_mtr_commit_and_start(&cursor, &mtr);
redo log for the record update. Thus,
redo log application at crash recovery
will see BLOBs being written to free pages. */
btr_mark_freed_leaves(index, &mtr, TRUE);
rec = btr_cur_get_rec(&cursor); rec = btr_cur_get_rec(&cursor);
offsets = rec_get_offsets(rec, index, offsets, offsets = rec_get_offsets(rec, index, offsets,
...@@ -2111,8 +2106,7 @@ row_ins_index_entry_low( ...@@ -2111,8 +2106,7 @@ row_ins_index_entry_low(
&heap); &heap);
err = btr_store_big_rec_extern_fields( err = btr_store_big_rec_extern_fields(
index, rec, offsets, big_rec, index, rec, offsets, big_rec, &mtr);
&mtr, &mtr);
/* If writing big_rec fails (for /* If writing big_rec fails (for
example, because of DB_OUT_OF_FILE_SPACE), example, because of DB_OUT_OF_FILE_SPACE),
the record will be corrupted. Even if the record will be corrupted. Even if
...@@ -2125,9 +2119,6 @@ row_ins_index_entry_low( ...@@ -2125,9 +2119,6 @@ row_ins_index_entry_low(
undo log, and thus the record cannot undo log, and thus the record cannot
be rolled back. */ be rolled back. */
ut_a(err == DB_SUCCESS); ut_a(err == DB_SUCCESS);
/* Free the pages again
in order to avoid a leak. */
btr_mark_freed_leaves(index, &mtr, FALSE);
goto stored_big_rec; goto stored_big_rec;
} }
} else { } else {
...@@ -2175,8 +2166,7 @@ row_ins_index_entry_low( ...@@ -2175,8 +2166,7 @@ row_ins_index_entry_low(
ULINT_UNDEFINED, &heap); ULINT_UNDEFINED, &heap);
err = btr_store_big_rec_extern_fields(index, rec, err = btr_store_big_rec_extern_fields(index, rec,
offsets, big_rec, offsets, big_rec, &mtr);
NULL, &mtr);
stored_big_rec: stored_big_rec:
if (modify) { if (modify) {
dtuple_big_rec_free(big_rec); dtuple_big_rec_free(big_rec);
......
...@@ -212,27 +212,23 @@ row_build( ...@@ -212,27 +212,23 @@ row_build(
} }
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
if (rec_offs_any_null_extern(rec, offsets)) { /* This condition can occur during crash recovery before
/* This condition can occur during crash recovery trx_rollback_or_clean_all_without_sess() has completed
before trx_rollback_or_clean_all_without_sess() has execution.
completed execution.
This condition is possible if the server crashed
This condition is possible if the server crashed during an insert or update before
during an insert or update before btr_store_big_rec_extern_fields() did mtr_commit() all
btr_store_big_rec_extern_fields() did mtr_commit() all BLOB pointers to the clustered index record.
BLOB pointers to the clustered index record.
If the record contains a null BLOB pointer, look up the
If the record contains a null BLOB pointer, look up the transaction that holds the implicit lock on this record, and
transaction that holds the implicit lock on this record, and assert that it is active. (In this version of InnoDB, we
assert that it is active. (In this version of InnoDB, we cannot assert that it was recovered, because there is no
cannot assert that it was recovered, because there is no trx->is_recovered field.) */
trx->is_recovered field.) */
ut_a(!rec_offs_any_null_extern(rec, offsets)
ut_a(trx_assert_active( || trx_assert_active(row_get_rec_trx_id(rec, index, offsets)));
row_get_rec_trx_id(rec, index, offsets)));
ut_a(trx_undo_roll_ptr_is_insert(
row_get_rec_roll_ptr(rec, index, offsets)));
}
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
if (type != ROW_COPY_POINTERS) { if (type != ROW_COPY_POINTERS) {
......
...@@ -1591,22 +1591,21 @@ row_upd_clust_rec( ...@@ -1591,22 +1591,21 @@ row_upd_clust_rec(
*offsets_ = (sizeof offsets_) / sizeof *offsets_; *offsets_ = (sizeof offsets_) / sizeof *offsets_;
ut_a(err == DB_SUCCESS); ut_a(err == DB_SUCCESS);
/* Write out the externally stored columns, but /* Write out the externally stored columns while still
allocate the pages and write the pointers using the x-latching index->lock and block->lock. We have to
mini-transaction of the record update. If any pages mtr_commit(mtr) first, so that the redo log will be
were freed in the update, temporarily mark them written in the correct order. Otherwise, we would run
allocated so that off-page columns will not overwrite into trouble on crash recovery if mtr freed B-tree
them. We must do this, because we write the redo log pages on which some of the big_rec fields will be
for the BLOB writes before writing the redo log for written. */
the record update. */ btr_cur_mtr_commit_and_start(btr_cur, mtr);
btr_mark_freed_leaves(index, mtr, TRUE);
rec = btr_cur_get_rec(btr_cur); rec = btr_cur_get_rec(btr_cur);
err = btr_store_big_rec_extern_fields( err = btr_store_big_rec_extern_fields(
index, rec, index, rec,
rec_get_offsets(rec, index, offsets_, rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap), ULINT_UNDEFINED, &heap),
big_rec, mtr, mtr); big_rec, mtr);
if (UNIV_LIKELY_NULL(heap)) { if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap); mem_heap_free(heap);
} }
...@@ -1619,8 +1618,6 @@ row_upd_clust_rec( ...@@ -1619,8 +1618,6 @@ row_upd_clust_rec(
to the undo log, and thus the record cannot be rolled to the undo log, and thus the record cannot be rolled
back. */ back. */
ut_a(err == DB_SUCCESS); ut_a(err == DB_SUCCESS);
/* Free the pages again in order to avoid a leak. */
btr_mark_freed_leaves(index, mtr, FALSE);
} }
mtr_commit(mtr); mtr_commit(mtr);
......
...@@ -864,7 +864,7 @@ trx_undo_add_page( ...@@ -864,7 +864,7 @@ trx_undo_add_page(
page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR
+ TRX_UNDO_FSEG_HEADER, + TRX_UNDO_FSEG_HEADER,
undo->top_page_no + 1, FSP_UP, undo->top_page_no + 1, FSP_UP,
TRUE, mtr, mtr); TRUE, mtr);
fil_space_release_free_extents(undo->space, n_reserved); fil_space_release_free_extents(undo->space, n_reserved);
......
...@@ -50,15 +50,6 @@ ...@@ -50,15 +50,6 @@
* include/trx0undo.h, trx/trx0rec.c, trx/trx0undo.c: * include/trx0undo.h, trx/trx0rec.c, trx/trx0undo.c:
Fix Bug#12547647 UPDATE LOGGING COULD EXCEED LOG PAGE SIZE Fix Bug#12547647 UPDATE LOGGING COULD EXCEED LOG PAGE SIZE
2011-08-29 The InnoDB Team
* btr/btr0btr.c, btr/btr0cur.c, fsp/fsp0fsp.c,
include/btr0btr.h, include/btr0cur.h, include/fsp0fsp.h,
include/mtr0mtr.h, include/mtr0mtr.ic, mtr/mtr0mtr.c,
row/row0ins.c, row/row0row.c, row/row0upd.c, trx/trx0undo.c:
Fix Bug#12704861 Corruption after a crash during BLOB update
and other regressions from the fix of Bug#12612184
2011-08-15 The InnoDB Team 2011-08-15 The InnoDB Team
* btr/btr0btr.c, btr/btr0cur.c, btr/btr0pcur.c, btr/btr0sea.c, * btr/btr0btr.c, btr/btr0cur.c, btr/btr0pcur.c, btr/btr0sea.c,
......
...@@ -906,29 +906,28 @@ btr_page_alloc_for_ibuf( ...@@ -906,29 +906,28 @@ btr_page_alloc_for_ibuf(
/**************************************************************//** /**************************************************************//**
Allocates a new file page to be used in an index tree. NOTE: we assume Allocates a new file page to be used in an index tree. NOTE: we assume
that the caller has made the reservation for free extents! that the caller has made the reservation for free extents!
@return allocated page number, FIL_NULL if out of space */ @return new allocated block, x-latched; NULL if out of space */
static __attribute__((nonnull(1,5), warn_unused_result)) UNIV_INTERN
ulint buf_block_t*
btr_page_alloc_low( btr_page_alloc(
/*===============*/ /*===========*/
dict_index_t* index, /*!< in: index */ dict_index_t* index, /*!< in: index */
ulint hint_page_no, /*!< in: hint of a good page */ ulint hint_page_no, /*!< in: hint of a good page */
byte file_direction, /*!< in: direction where a possible byte file_direction, /*!< in: direction where a possible
page split is made */ page split is made */
ulint level, /*!< in: level where the page is placed ulint level, /*!< in: level where the page is placed
in the tree */ in the tree */
mtr_t* mtr, /*!< in/out: mini-transaction mtr_t* mtr) /*!< in: mtr */
for the allocation */
mtr_t* init_mtr) /*!< in/out: mini-transaction
in which the page should be
initialized (may be the same
as mtr), or NULL if it should
not be initialized (the page
at hint was previously freed
in mtr) */
{ {
fseg_header_t* seg_header; fseg_header_t* seg_header;
page_t* root; page_t* root;
buf_block_t* new_block;
ulint new_page_no;
if (dict_index_is_ibuf(index)) {
return(btr_page_alloc_for_ibuf(index, mtr));
}
root = btr_root_get(index, mtr); root = btr_root_get(index, mtr);
...@@ -942,42 +941,8 @@ btr_page_alloc_low( ...@@ -942,42 +941,8 @@ btr_page_alloc_low(
reservation for free extents, and thus we know that a page can reservation for free extents, and thus we know that a page can
be allocated: */ be allocated: */
return(fseg_alloc_free_page_general( new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
seg_header, hint_page_no, file_direction, file_direction, TRUE, mtr);
TRUE, mtr, init_mtr));
}
/**************************************************************//**
Allocates a new file page to be used in an index tree. NOTE: we assume
that the caller has made the reservation for free extents!
@return new allocated block, x-latched; NULL if out of space */
UNIV_INTERN
buf_block_t*
btr_page_alloc(
/*===========*/
dict_index_t* index, /*!< in: index */
ulint hint_page_no, /*!< in: hint of a good page */
byte file_direction, /*!< in: direction where a possible
page split is made */
ulint level, /*!< in: level where the page is placed
in the tree */
mtr_t* mtr, /*!< in/out: mini-transaction
for the allocation */
mtr_t* init_mtr) /*!< in/out: mini-transaction
for x-latching and initializing
the page */
{
buf_block_t* new_block;
ulint new_page_no;
if (dict_index_is_ibuf(index)) {
return(btr_page_alloc_for_ibuf(index, mtr));
}
new_page_no = btr_page_alloc_low(
index, hint_page_no, file_direction, level, mtr, init_mtr);
if (new_page_no == FIL_NULL) { if (new_page_no == FIL_NULL) {
return(NULL); return(NULL);
...@@ -985,16 +950,9 @@ btr_page_alloc( ...@@ -985,16 +950,9 @@ btr_page_alloc(
new_block = buf_page_get(dict_index_get_space(index), new_block = buf_page_get(dict_index_get_space(index),
dict_table_zip_size(index->table), dict_table_zip_size(index->table),
new_page_no, RW_X_LATCH, init_mtr); new_page_no, RW_X_LATCH, mtr);
buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW); buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
if (mtr->freed_clust_leaf) {
mtr_memo_release(mtr, new_block, MTR_MEMO_FREE_CLUST_LEAF);
ut_ad(!mtr_memo_contains(mtr, new_block,
MTR_MEMO_FREE_CLUST_LEAF));
}
ut_ad(btr_freed_leaves_validate(mtr));
return(new_block); return(new_block);
} }
...@@ -1129,139 +1087,12 @@ btr_page_free( ...@@ -1129,139 +1087,12 @@ btr_page_free(
buf_block_t* block, /*!< in: block to be freed, x-latched */ buf_block_t* block, /*!< in: block to be freed, x-latched */
mtr_t* mtr) /*!< in: mtr */ mtr_t* mtr) /*!< in: mtr */
{ {
const page_t* page = buf_block_get_frame(block); ulint level;
ulint level = btr_page_get_level(page, mtr);
ut_ad(fil_page_get_type(block->frame) == FIL_PAGE_INDEX);
btr_page_free_low(index, block, level, mtr);
/* The handling of MTR_MEMO_FREE_CLUST_LEAF assumes this. */
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
if (level == 0 && dict_index_is_clust(index)) {
/* We may have to call btr_mark_freed_leaves() to
temporarily mark the block nonfree for invoking
btr_store_big_rec_extern_fields_func() after an
update. Remember that the block was freed. */
mtr->freed_clust_leaf = TRUE;
mtr_memo_push(mtr, block, MTR_MEMO_FREE_CLUST_LEAF);
}
ut_ad(btr_freed_leaves_validate(mtr));
}
/**************************************************************//**
Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free.
For invoking btr_store_big_rec_extern_fields() after an update,
we must temporarily mark freed clustered index pages allocated, so
that off-page columns will not be allocated from them. Between the
btr_store_big_rec_extern_fields() and mtr_commit() we have to
mark the pages free again, so that no pages will be leaked. */
UNIV_INTERN
void
btr_mark_freed_leaves(
/*==================*/
dict_index_t* index, /*!< in/out: clustered index */
mtr_t* mtr, /*!< in/out: mini-transaction */
ibool nonfree)/*!< in: TRUE=mark nonfree, FALSE=mark freed */
{
/* This is loosely based on mtr_memo_release(). */
ulint offset;
ut_ad(dict_index_is_clust(index));
ut_ad(mtr->magic_n == MTR_MAGIC_N);
ut_ad(mtr->state == MTR_ACTIVE);
if (!mtr->freed_clust_leaf) {
return;
}
offset = dyn_array_get_data_size(&mtr->memo);
while (offset > 0) {
mtr_memo_slot_t* slot;
buf_block_t* block;
offset -= sizeof *slot;
slot = dyn_array_get_element(&mtr->memo, offset);
if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) {
continue;
}
/* Because btr_page_alloc() does invoke
mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all
blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the
memo must still be clustered index leaf tree pages. */
block = slot->object;
ut_a(buf_block_get_space(block)
== dict_index_get_space(index));
ut_a(fil_page_get_type(buf_block_get_frame(block))
== FIL_PAGE_INDEX);
ut_a(page_is_leaf(buf_block_get_frame(block)));
if (nonfree) {
/* Allocate the same page again. */
ulint page_no;
page_no = btr_page_alloc_low(
index, buf_block_get_page_no(block),
FSP_NO_DIR, 0, mtr, NULL);
ut_a(page_no == buf_block_get_page_no(block));
} else {
/* Assert that the page is allocated and free it. */
btr_page_free_low(index, block, 0, mtr);
}
}
ut_ad(btr_freed_leaves_validate(mtr));
}
#ifdef UNIV_DEBUG
/**************************************************************//**
Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF.
@see btr_mark_freed_leaves()
@return TRUE */
UNIV_INTERN
ibool
btr_freed_leaves_validate(
/*======================*/
mtr_t* mtr) /*!< in: mini-transaction */
{
ulint offset;
ut_ad(mtr->magic_n == MTR_MAGIC_N);
ut_ad(mtr->state == MTR_ACTIVE);
offset = dyn_array_get_data_size(&mtr->memo);
while (offset > 0) {
const mtr_memo_slot_t* slot;
const buf_block_t* block;
offset -= sizeof *slot;
slot = dyn_array_get_element(&mtr->memo, offset);
if (slot->type != MTR_MEMO_FREE_CLUST_LEAF) {
continue;
}
ut_a(mtr->freed_clust_leaf); level = btr_page_get_level(buf_block_get_frame(block), mtr);
/* Because btr_page_alloc() does invoke
mtr_memo_release on MTR_MEMO_FREE_CLUST_LEAF, all
blocks tagged with MTR_MEMO_FREE_CLUST_LEAF in the
memo must still be clustered index leaf tree pages. */
block = slot->object;
ut_a(fil_page_get_type(buf_block_get_frame(block))
== FIL_PAGE_INDEX);
ut_a(page_is_leaf(buf_block_get_frame(block)));
}
return(TRUE); btr_page_free_low(index, block, level, mtr);
} }
#endif /* UNIV_DEBUG */
/**************************************************************//** /**************************************************************//**
Sets the child node file address in a node pointer. */ Sets the child node file address in a node pointer. */
...@@ -1984,7 +1815,7 @@ btr_root_raise_and_insert( ...@@ -1984,7 +1815,7 @@ btr_root_raise_and_insert(
level = btr_page_get_level(root, mtr); level = btr_page_get_level(root, mtr);
new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr, mtr); new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr);
new_page = buf_block_get_frame(new_block); new_page = buf_block_get_frame(new_block);
new_page_zip = buf_block_get_page_zip(new_block); new_page_zip = buf_block_get_page_zip(new_block);
ut_a(!new_page_zip == !root_page_zip); ut_a(!new_page_zip == !root_page_zip);
...@@ -2720,7 +2551,7 @@ btr_page_split_and_insert( ...@@ -2720,7 +2551,7 @@ btr_page_split_and_insert(
/* 2. Allocate a new page to the index */ /* 2. Allocate a new page to the index */
new_block = btr_page_alloc(cursor->index, hint_page_no, direction, new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
btr_page_get_level(page, mtr), mtr, mtr); btr_page_get_level(page, mtr), mtr);
new_page = buf_block_get_frame(new_block); new_page = buf_block_get_frame(new_block);
new_page_zip = buf_block_get_page_zip(new_block); new_page_zip = buf_block_get_page_zip(new_block);
btr_page_create(new_block, new_page_zip, cursor->index, btr_page_create(new_block, new_page_zip, cursor->index,
......
...@@ -2421,6 +2421,39 @@ btr_cur_pessimistic_update( ...@@ -2421,6 +2421,39 @@ btr_cur_pessimistic_update(
return(err); return(err);
} }
/**************************************************************//**
Commits and restarts a mini-transaction so that it will retain an
x-lock on index->lock and the cursor page. */
UNIV_INTERN
void
btr_cur_mtr_commit_and_start(
/*=========================*/
btr_cur_t* cursor, /*!< in: cursor */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
buf_block_t* block;
block = btr_cur_get_block(cursor);
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
MTR_MEMO_X_LOCK));
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
/* Keep the locks across the mtr_commit(mtr). */
rw_lock_x_lock(dict_index_get_lock(cursor->index));
rw_lock_x_lock(&block->lock);
mutex_enter(&block->mutex);
buf_block_buf_fix_inc(block, __FILE__, __LINE__);
mutex_exit(&block->mutex);
/* Write out the redo log. */
mtr_commit(mtr);
mtr_start(mtr);
/* Reassociate the locks with the mini-transaction.
They will be released on mtr_commit(mtr). */
mtr_memo_push(mtr, dict_index_get_lock(cursor->index),
MTR_MEMO_X_LOCK);
mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX);
}
/*==================== B-TREE DELETE MARK AND UNMARK ===============*/ /*==================== B-TREE DELETE MARK AND UNMARK ===============*/
/****************************************************************//** /****************************************************************//**
...@@ -3863,9 +3896,6 @@ btr_store_big_rec_extern_fields_func( ...@@ -3863,9 +3896,6 @@ btr_store_big_rec_extern_fields_func(
the "external storage" flags in offsets the "external storage" flags in offsets
will not correspond to rec when will not correspond to rec when
this function returns */ this function returns */
const big_rec_t*big_rec_vec, /*!< in: vector containing fields
to be stored externally */
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
mtr_t* local_mtr, /*!< in: mtr containing the mtr_t* local_mtr, /*!< in: mtr containing the
latch to rec and to the tree */ latch to rec and to the tree */
...@@ -3874,11 +3904,9 @@ btr_store_big_rec_extern_fields_func( ...@@ -3874,11 +3904,9 @@ btr_store_big_rec_extern_fields_func(
ibool update_in_place,/*! in: TRUE if the record is updated ibool update_in_place,/*! in: TRUE if the record is updated
in place (not delete+insert) */ in place (not delete+insert) */
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
mtr_t* alloc_mtr) /*!< in/out: in an insert, NULL; const big_rec_t*big_rec_vec) /*!< in: vector containing fields
in an update, local_mtr for to be stored externally */
allocating BLOB pages and
updating BLOB pointers; alloc_mtr
must not have freed any leaf pages */
{ {
ulint rec_page_no; ulint rec_page_no;
byte* field_ref; byte* field_ref;
...@@ -3897,9 +3925,6 @@ btr_store_big_rec_extern_fields_func( ...@@ -3897,9 +3925,6 @@ btr_store_big_rec_extern_fields_func(
ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(rec_offs_any_extern(offsets)); ut_ad(rec_offs_any_extern(offsets));
ut_ad(local_mtr);
ut_ad(!alloc_mtr || alloc_mtr == local_mtr);
ut_ad(!update_in_place || alloc_mtr);
ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index), ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK)); MTR_MEMO_X_LOCK));
ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX)); ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
...@@ -3915,25 +3940,6 @@ btr_store_big_rec_extern_fields_func( ...@@ -3915,25 +3940,6 @@ btr_store_big_rec_extern_fields_func(
rec_page_no = buf_block_get_page_no(rec_block); rec_page_no = buf_block_get_page_no(rec_block);
ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX); ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
if (alloc_mtr) {
/* Because alloc_mtr will be committed after
mtr, it is possible that the tablespace has been
extended when the B-tree record was updated or
inserted, or it will be extended while allocating
pages for big_rec.
TODO: In mtr (not alloc_mtr), write a redo log record
about extending the tablespace to its current size,
and remember the current size. Whenever the tablespace
grows as pages are allocated, write further redo log
records to mtr. (Currently tablespace extension is not
covered by the redo log. If it were, the record would
only be written to alloc_mtr, which is committed after
mtr.) */
} else {
alloc_mtr = &mtr;
}
if (UNIV_LIKELY_NULL(page_zip)) { if (UNIV_LIKELY_NULL(page_zip)) {
int err; int err;
...@@ -4010,7 +4016,7 @@ btr_store_big_rec_extern_fields_func( ...@@ -4010,7 +4016,7 @@ btr_store_big_rec_extern_fields_func(
} }
block = btr_page_alloc(index, hint_page_no, block = btr_page_alloc(index, hint_page_no,
FSP_NO_DIR, 0, alloc_mtr, &mtr); FSP_NO_DIR, 0, &mtr);
if (UNIV_UNLIKELY(block == NULL)) { if (UNIV_UNLIKELY(block == NULL)) {
mtr_commit(&mtr); mtr_commit(&mtr);
...@@ -4137,15 +4143,11 @@ btr_store_big_rec_extern_fields_func( ...@@ -4137,15 +4143,11 @@ btr_store_big_rec_extern_fields_func(
goto next_zip_page; goto next_zip_page;
} }
if (alloc_mtr == &mtr) { rec_block = buf_page_get(space_id, zip_size,
rec_block = buf_page_get( rec_page_no,
space_id, zip_size, RW_X_LATCH, &mtr);
rec_page_no, buf_block_dbg_add_level(rec_block,
RW_X_LATCH, &mtr); SYNC_NO_ORDER_CHECK);
buf_block_dbg_add_level(
rec_block,
SYNC_NO_ORDER_CHECK);
}
if (err == Z_STREAM_END) { if (err == Z_STREAM_END) {
mach_write_to_4(field_ref mach_write_to_4(field_ref
...@@ -4179,8 +4181,7 @@ btr_store_big_rec_extern_fields_func( ...@@ -4179,8 +4181,7 @@ btr_store_big_rec_extern_fields_func(
page_zip_write_blob_ptr( page_zip_write_blob_ptr(
page_zip, rec, index, offsets, page_zip, rec, index, offsets,
big_rec_vec->fields[i].field_no, big_rec_vec->fields[i].field_no, &mtr);
alloc_mtr);
next_zip_page: next_zip_page:
prev_page_no = page_no; prev_page_no = page_no;
...@@ -4225,23 +4226,19 @@ btr_store_big_rec_extern_fields_func( ...@@ -4225,23 +4226,19 @@ btr_store_big_rec_extern_fields_func(
extern_len -= store_len; extern_len -= store_len;
if (alloc_mtr == &mtr) { rec_block = buf_page_get(space_id, zip_size,
rec_block = buf_page_get( rec_page_no,
space_id, zip_size, RW_X_LATCH, &mtr);
rec_page_no, buf_block_dbg_add_level(rec_block,
RW_X_LATCH, &mtr); SYNC_NO_ORDER_CHECK);
buf_block_dbg_add_level(
rec_block,
SYNC_NO_ORDER_CHECK);
}
mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0, mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
MLOG_4BYTES, alloc_mtr); MLOG_4BYTES, &mtr);
mlog_write_ulint(field_ref mlog_write_ulint(field_ref
+ BTR_EXTERN_LEN + 4, + BTR_EXTERN_LEN + 4,
big_rec_vec->fields[i].len big_rec_vec->fields[i].len
- extern_len, - extern_len,
MLOG_4BYTES, alloc_mtr); MLOG_4BYTES, &mtr);
if (prev_page_no == FIL_NULL) { if (prev_page_no == FIL_NULL) {
btr_blob_dbg_add_blob( btr_blob_dbg_add_blob(
...@@ -4251,19 +4248,18 @@ btr_store_big_rec_extern_fields_func( ...@@ -4251,19 +4248,18 @@ btr_store_big_rec_extern_fields_func(
mlog_write_ulint(field_ref mlog_write_ulint(field_ref
+ BTR_EXTERN_SPACE_ID, + BTR_EXTERN_SPACE_ID,
space_id, MLOG_4BYTES, space_id,
alloc_mtr); MLOG_4BYTES, &mtr);
mlog_write_ulint(field_ref mlog_write_ulint(field_ref
+ BTR_EXTERN_PAGE_NO, + BTR_EXTERN_PAGE_NO,
page_no, MLOG_4BYTES, page_no,
alloc_mtr); MLOG_4BYTES, &mtr);
mlog_write_ulint(field_ref mlog_write_ulint(field_ref
+ BTR_EXTERN_OFFSET, + BTR_EXTERN_OFFSET,
FIL_PAGE_DATA, FIL_PAGE_DATA,
MLOG_4BYTES, MLOG_4BYTES, &mtr);
alloc_mtr);
} }
prev_page_no = page_no; prev_page_no = page_no;
......
This diff is collapsed.
...@@ -557,12 +557,7 @@ btr_page_alloc( ...@@ -557,12 +557,7 @@ btr_page_alloc(
page split is made */ page split is made */
ulint level, /*!< in: level where the page is placed ulint level, /*!< in: level where the page is placed
in the tree */ in the tree */
mtr_t* mtr, /*!< in/out: mini-transaction mtr_t* mtr); /*!< in: mtr */
for the allocation */
mtr_t* init_mtr) /*!< in/out: mini-transaction
for x-latching and initializing
the page */
__attribute__((nonnull, warn_unused_result));
/**************************************************************//** /**************************************************************//**
Frees a file page used in an index tree. NOTE: cannot free field external Frees a file page used in an index tree. NOTE: cannot free field external
storage pages because the page must contain info on its level. */ storage pages because the page must contain info on its level. */
...@@ -585,33 +580,6 @@ btr_page_free_low( ...@@ -585,33 +580,6 @@ btr_page_free_low(
buf_block_t* block, /*!< in: block to be freed, x-latched */ buf_block_t* block, /*!< in: block to be freed, x-latched */
ulint level, /*!< in: page level */ ulint level, /*!< in: page level */
mtr_t* mtr); /*!< in: mtr */ mtr_t* mtr); /*!< in: mtr */
/**************************************************************//**
Marks all MTR_MEMO_FREE_CLUST_LEAF pages nonfree or free.
For invoking btr_store_big_rec_extern_fields() after an update,
we must temporarily mark freed clustered index pages allocated, so
that off-page columns will not be allocated from them. Between the
btr_store_big_rec_extern_fields() and mtr_commit() we have to
mark the pages free again, so that no pages will be leaked. */
UNIV_INTERN
void
btr_mark_freed_leaves(
/*==================*/
dict_index_t* index, /*!< in/out: clustered index */
mtr_t* mtr, /*!< in/out: mini-transaction */
ibool nonfree)/*!< in: TRUE=mark nonfree, FALSE=mark freed */
__attribute__((nonnull));
#ifdef UNIV_DEBUG
/**************************************************************//**
Validates all pages marked MTR_MEMO_FREE_CLUST_LEAF.
@see btr_mark_freed_leaves()
@return TRUE */
UNIV_INTERN
ibool
btr_freed_leaves_validate(
/*======================*/
mtr_t* mtr) /*!< in: mini-transaction */
__attribute__((nonnull, warn_unused_result));
#endif /* UNIV_DEBUG */
#ifdef UNIV_BTR_PRINT #ifdef UNIV_BTR_PRINT
/*************************************************************//** /*************************************************************//**
Prints size info of a B-tree. */ Prints size info of a B-tree. */
......
...@@ -326,6 +326,16 @@ btr_cur_pessimistic_update( ...@@ -326,6 +326,16 @@ btr_cur_pessimistic_update(
que_thr_t* thr, /*!< in: query thread */ que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr); /*!< in: mtr; must be committed before mtr_t* mtr); /*!< in: mtr; must be committed before
latching any further pages */ latching any further pages */
/*****************************************************************
Commits and restarts a mini-transaction so that it will retain an
x-lock on index->lock and the cursor page. */
UNIV_INTERN
void
btr_cur_mtr_commit_and_start(
/*=========================*/
btr_cur_t* cursor, /*!< in: cursor */
mtr_t* mtr) /*!< in/out: mini-transaction */
__attribute__((nonnull));
/***********************************************************//** /***********************************************************//**
Marks a clustered index record deleted. Writes an undo log record to Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id undo log on this delete marking. Writes in the trx id field the id
...@@ -530,8 +540,6 @@ btr_store_big_rec_extern_fields_func( ...@@ -530,8 +540,6 @@ btr_store_big_rec_extern_fields_func(
the "external storage" flags in offsets the "external storage" flags in offsets
will not correspond to rec when will not correspond to rec when
this function returns */ this function returns */
const big_rec_t*big_rec_vec, /*!< in: vector containing fields
to be stored externally */
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
mtr_t* local_mtr, /*!< in: mtr containing the mtr_t* local_mtr, /*!< in: mtr containing the
latch to rec and to the tree */ latch to rec and to the tree */
...@@ -540,12 +548,9 @@ btr_store_big_rec_extern_fields_func( ...@@ -540,12 +548,9 @@ btr_store_big_rec_extern_fields_func(
ibool update_in_place,/*! in: TRUE if the record is updated ibool update_in_place,/*! in: TRUE if the record is updated
in place (not delete+insert) */ in place (not delete+insert) */
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
mtr_t* alloc_mtr) /*!< in/out: in an insert, NULL; const big_rec_t*big_rec_vec) /*!< in: vector containing fields
in an update, local_mtr for to be stored externally */
allocating BLOB pages and __attribute__((nonnull));
updating BLOB pointers; alloc_mtr
must not have freed any leaf pages */
__attribute__((nonnull(1,2,3,4,5), warn_unused_result));
/** Stores the fields in big_rec_vec to the tablespace and puts pointers to /** Stores the fields in big_rec_vec to the tablespace and puts pointers to
them in rec. The extern flags in rec will have to be set beforehand. them in rec. The extern flags in rec will have to be set beforehand.
...@@ -554,22 +559,21 @@ file segment of the index tree. ...@@ -554,22 +559,21 @@ file segment of the index tree.
@param index in: clustered index; MUST be X-latched by mtr @param index in: clustered index; MUST be X-latched by mtr
@param b in/out: block containing rec; MUST be X-latched by mtr @param b in/out: block containing rec; MUST be X-latched by mtr
@param rec in/out: clustered index record @param rec in/out: clustered index record
@param offs in: rec_get_offsets(rec, index); @param offsets in: rec_get_offsets(rec, index);
the "external storage" flags in offsets will not be adjusted the "external storage" flags in offsets will not be adjusted
@param big in: vector containing fields to be stored externally
@param mtr in: mini-transaction that holds x-latch on index and b @param mtr in: mini-transaction that holds x-latch on index and b
@param upd in: TRUE if the record is updated in place (not delete+insert) @param upd in: TRUE if the record is updated in place (not delete+insert)
@param rmtr in/out: in updates, the mini-transaction that holds rec @param big in: vector containing fields to be stored externally
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
# define btr_store_big_rec_extern_fields(index,b,rec,offs,big,mtr,upd,rmtr) \ # define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \
btr_store_big_rec_extern_fields_func(index,b,rec,offs,big,mtr,upd,rmtr) btr_store_big_rec_extern_fields_func(index,b,rec,offsets,mtr,upd,big)
#elif defined UNIV_BLOB_LIGHT_DEBUG #elif defined UNIV_BLOB_LIGHT_DEBUG
# define btr_store_big_rec_extern_fields(index,b,rec,offs,big,mtr,upd,rmtr) \ # define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \
btr_store_big_rec_extern_fields_func(index,b,rec,offs,big,upd,rmtr) btr_store_big_rec_extern_fields_func(index,b,rec,offsets,upd,big)
#else #else
# define btr_store_big_rec_extern_fields(index,b,rec,offs,big,mtr,upd,rmtr) \ # define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \
btr_store_big_rec_extern_fields_func(index,b,rec,offs,big,rmtr) btr_store_big_rec_extern_fields_func(index,b,rec,offsets,big)
#endif #endif
/*******************************************************************//** /*******************************************************************//**
......
...@@ -176,18 +176,19 @@ fseg_n_reserved_pages( ...@@ -176,18 +176,19 @@ fseg_n_reserved_pages(
Allocates a single free page from a segment. This function implements Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize the intelligent allocation strategy which tries to minimize
file space fragmentation. file space fragmentation.
@param[in/out] seg_header segment header @return the allocated page offset FIL_NULL if no page could be allocated */
@param[in] hint hint of which page would be desirable UNIV_INTERN
@param[in] direction if the new page is needed because ulint
fseg_alloc_free_page(
/*=================*/
fseg_header_t* seg_header, /*!< in: segment header */
ulint hint, /*!< in: hint of which page would be desirable */
byte direction, /*!< in: if the new page is needed because
of an index page split, and records are of an index page split, and records are
inserted there in order, into which inserted there in order, into which
direction they go alphabetically: FSP_DOWN, direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR FSP_UP, FSP_NO_DIR */
@param[in/out] mtr mini-transaction mtr_t* mtr); /*!< in: mtr handle */
@return the allocated page offset FIL_NULL if no page could be allocated */
#define fseg_alloc_free_page(seg_header, hint, direction, mtr) \
fseg_alloc_free_page_general(seg_header, hint, direction, \
FALSE, mtr, mtr)
/**********************************************************************//** /**********************************************************************//**
Allocates a single free page from a segment. This function implements Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space the intelligent allocation strategy which tries to minimize file space
...@@ -209,11 +210,7 @@ fseg_alloc_free_page_general( ...@@ -209,11 +210,7 @@ fseg_alloc_free_page_general(
with fsp_reserve_free_extents, then there with fsp_reserve_free_extents, then there
is no need to do the check for this individual is no need to do the check for this individual
page */ page */
mtr_t* mtr, /*!< in/out: mini-transaction */ mtr_t* mtr) /*!< in/out: mini-transaction */
mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction
in which the page should be initialized,
or NULL if this is a "fake allocation" of
a page that was previously freed in mtr */
__attribute__((warn_unused_result, nonnull(1,5))); __attribute__((warn_unused_result, nonnull(1,5)));
/**********************************************************************//** /**********************************************************************//**
Reserves free pages from a tablespace. All mini-transactions which may Reserves free pages from a tablespace. All mini-transactions which may
......
...@@ -53,8 +53,6 @@ first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ ...@@ -53,8 +53,6 @@ first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
#define MTR_MEMO_MODIFY 54 #define MTR_MEMO_MODIFY 54
#define MTR_MEMO_S_LOCK 55 #define MTR_MEMO_S_LOCK 55
#define MTR_MEMO_X_LOCK 56 #define MTR_MEMO_X_LOCK 56
/** The mini-transaction freed a clustered index leaf page. */
#define MTR_MEMO_FREE_CLUST_LEAF 57
/** @name Log item types /** @name Log item types
The log items are declared 'byte' so that the compiler can warn if val The log items are declared 'byte' so that the compiler can warn if val
...@@ -379,12 +377,9 @@ struct mtr_struct{ ...@@ -379,12 +377,9 @@ struct mtr_struct{
#endif #endif
dyn_array_t memo; /*!< memo stack for locks etc. */ dyn_array_t memo; /*!< memo stack for locks etc. */
dyn_array_t log; /*!< mini-transaction log */ dyn_array_t log; /*!< mini-transaction log */
unsigned modifications:1; ibool modifications;
/*!< TRUE if the mini-transaction /* TRUE if the mtr made modifications to
modified buffer pool pages */ buffer pool pages */
unsigned freed_clust_leaf:1;
/*!< TRUE if MTR_MEMO_FREE_CLUST_LEAF
was logged in the mini-transaction */
ulint n_log_recs; ulint n_log_recs;
/* count of how many page initial log records /* count of how many page initial log records
have been written to the mtr log */ have been written to the mtr log */
......
...@@ -44,7 +44,6 @@ mtr_start( ...@@ -44,7 +44,6 @@ mtr_start(
mtr->log_mode = MTR_LOG_ALL; mtr->log_mode = MTR_LOG_ALL;
mtr->modifications = FALSE; mtr->modifications = FALSE;
mtr->freed_clust_leaf = FALSE;
mtr->n_log_recs = 0; mtr->n_log_recs = 0;
ut_d(mtr->state = MTR_ACTIVE); ut_d(mtr->state = MTR_ACTIVE);
...@@ -68,8 +67,7 @@ mtr_memo_push( ...@@ -68,8 +67,7 @@ mtr_memo_push(
ut_ad(object); ut_ad(object);
ut_ad(type >= MTR_MEMO_PAGE_S_FIX); ut_ad(type >= MTR_MEMO_PAGE_S_FIX);
ut_ad(type <= MTR_MEMO_FREE_CLUST_LEAF); ut_ad(type <= MTR_MEMO_X_LOCK);
ut_ad(type != MTR_MEMO_FREE_CLUST_LEAF || mtr->freed_clust_leaf);
ut_ad(mtr); ut_ad(mtr);
ut_ad(mtr->magic_n == MTR_MAGIC_N); ut_ad(mtr->magic_n == MTR_MAGIC_N);
ut_ad(mtr->state == MTR_ACTIVE); ut_ad(mtr->state == MTR_ACTIVE);
......
...@@ -58,11 +58,12 @@ mtr_memo_slot_release( ...@@ -58,11 +58,12 @@ mtr_memo_slot_release(
buf_page_release((buf_block_t*)object, type, mtr); buf_page_release((buf_block_t*)object, type, mtr);
} else if (type == MTR_MEMO_S_LOCK) { } else if (type == MTR_MEMO_S_LOCK) {
rw_lock_s_unlock((rw_lock_t*)object); rw_lock_s_unlock((rw_lock_t*)object);
#ifdef UNIV_DEBUG
} else if (type != MTR_MEMO_X_LOCK) { } else if (type != MTR_MEMO_X_LOCK) {
ut_ad(type == MTR_MEMO_MODIFY ut_ad(type == MTR_MEMO_MODIFY);
|| type == MTR_MEMO_FREE_CLUST_LEAF);
ut_ad(mtr_memo_contains(mtr, object, ut_ad(mtr_memo_contains(mtr, object,
MTR_MEMO_PAGE_X_FIX)); MTR_MEMO_PAGE_X_FIX));
#endif /* UNIV_DEBUG */
} else { } else {
rw_lock_x_unlock((rw_lock_t*)object); rw_lock_x_unlock((rw_lock_t*)object);
} }
......
...@@ -2097,20 +2097,15 @@ row_ins_index_entry_low( ...@@ -2097,20 +2097,15 @@ row_ins_index_entry_low(
if (big_rec) { if (big_rec) {
ut_a(err == DB_SUCCESS); ut_a(err == DB_SUCCESS);
/* Write out the externally stored /* Write out the externally stored
columns, but allocate the pages and columns while still x-latching
write the pointers using the index->lock and block->lock. We have
mini-transaction of the record update. to mtr_commit(mtr) first, so that the
If any pages were freed in the update, redo log will be written in the
temporarily mark them allocated so correct order. Otherwise, we would run
that off-page columns will not into trouble on crash recovery if mtr
overwrite them. We must do this, freed B-tree pages on which some of
because we will write the redo log for the big_rec fields will be written. */
the BLOB writes before writing the btr_cur_mtr_commit_and_start(&cursor, &mtr);
redo log for the record update. Thus,
redo log application at crash recovery
will see BLOBs being written to free pages. */
btr_mark_freed_leaves(index, &mtr, TRUE);
rec = btr_cur_get_rec(&cursor); rec = btr_cur_get_rec(&cursor);
offsets = rec_get_offsets( offsets = rec_get_offsets(
...@@ -2119,8 +2114,7 @@ row_ins_index_entry_low( ...@@ -2119,8 +2114,7 @@ row_ins_index_entry_low(
err = btr_store_big_rec_extern_fields( err = btr_store_big_rec_extern_fields(
index, btr_cur_get_block(&cursor), index, btr_cur_get_block(&cursor),
rec, offsets, big_rec, &mtr, rec, offsets, &mtr, FALSE, big_rec);
FALSE, &mtr);
/* If writing big_rec fails (for /* If writing big_rec fails (for
example, because of DB_OUT_OF_FILE_SPACE), example, because of DB_OUT_OF_FILE_SPACE),
the record will be corrupted. Even if the record will be corrupted. Even if
...@@ -2133,9 +2127,6 @@ row_ins_index_entry_low( ...@@ -2133,9 +2127,6 @@ row_ins_index_entry_low(
undo log, and thus the record cannot undo log, and thus the record cannot
be rolled back. */ be rolled back. */
ut_a(err == DB_SUCCESS); ut_a(err == DB_SUCCESS);
/* Free the pages again
in order to avoid a leak. */
btr_mark_freed_leaves(index, &mtr, FALSE);
goto stored_big_rec; goto stored_big_rec;
} }
} else { } else {
...@@ -2177,7 +2168,7 @@ row_ins_index_entry_low( ...@@ -2177,7 +2168,7 @@ row_ins_index_entry_low(
err = btr_store_big_rec_extern_fields( err = btr_store_big_rec_extern_fields(
index, btr_cur_get_block(&cursor), index, btr_cur_get_block(&cursor),
rec, offsets, big_rec, &mtr, FALSE, NULL); rec, offsets, &mtr, FALSE, big_rec);
stored_big_rec: stored_big_rec:
if (modify) { if (modify) {
......
...@@ -243,20 +243,19 @@ row_build( ...@@ -243,20 +243,19 @@ row_build(
} }
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
if (rec_offs_any_null_extern(rec, offsets)) { /* This condition can occur during crash recovery before
/* This condition can occur during crash recovery trx_rollback_active() has completed execution.
before trx_rollback_active() has completed execution.
This condition is possible if the server crashed
This condition is possible if the server crashed during an insert or update before
during an insert or update-by-delete-and-insert before btr_store_big_rec_extern_fields() did mtr_commit() all
btr_store_big_rec_extern_fields() did mtr_commit() all BLOB pointers to the clustered index record.
BLOB pointers to the freshly inserted clustered index
record. */ If the record contains a null BLOB pointer, look up the
ut_a(trx_assert_recovered( transaction that holds the implicit lock on this record, and
row_get_rec_trx_id(rec, index, offsets))); assert that it was recovered (and will soon be rolled back). */
ut_a(trx_undo_roll_ptr_is_insert( ut_a(!rec_offs_any_null_extern(rec, offsets)
row_get_rec_roll_ptr(rec, index, offsets))); || trx_assert_recovered(row_get_rec_trx_id(rec, index, offsets)));
}
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
if (type != ROW_COPY_POINTERS) { if (type != ROW_COPY_POINTERS) {
......
...@@ -1978,22 +1978,21 @@ row_upd_clust_rec( ...@@ -1978,22 +1978,21 @@ row_upd_clust_rec(
rec_offs_init(offsets_); rec_offs_init(offsets_);
ut_a(err == DB_SUCCESS); ut_a(err == DB_SUCCESS);
/* Write out the externally stored columns, but /* Write out the externally stored columns while still
allocate the pages and write the pointers using the x-latching index->lock and block->lock. We have to
mini-transaction of the record update. If any pages mtr_commit(mtr) first, so that the redo log will be
were freed in the update, temporarily mark them written in the correct order. Otherwise, we would run
allocated so that off-page columns will not overwrite into trouble on crash recovery if mtr freed B-tree
them. We must do this, because we write the redo log pages on which some of the big_rec fields will be
for the BLOB writes before writing the redo log for written. */
the record update. */ btr_cur_mtr_commit_and_start(btr_cur, mtr);
btr_mark_freed_leaves(index, mtr, TRUE);
rec = btr_cur_get_rec(btr_cur); rec = btr_cur_get_rec(btr_cur);
err = btr_store_big_rec_extern_fields( err = btr_store_big_rec_extern_fields(
index, btr_cur_get_block(btr_cur), rec, index, btr_cur_get_block(btr_cur), rec,
rec_get_offsets(rec, index, offsets_, rec_get_offsets(rec, index, offsets_,
ULINT_UNDEFINED, &heap), ULINT_UNDEFINED, &heap),
big_rec, mtr, TRUE, mtr); mtr, TRUE, big_rec);
/* If writing big_rec fails (for example, because of /* If writing big_rec fails (for example, because of
DB_OUT_OF_FILE_SPACE), the record will be corrupted. DB_OUT_OF_FILE_SPACE), the record will be corrupted.
Even if we did not update any externally stored Even if we did not update any externally stored
...@@ -2003,8 +2002,6 @@ row_upd_clust_rec( ...@@ -2003,8 +2002,6 @@ row_upd_clust_rec(
to the undo log, and thus the record cannot be rolled to the undo log, and thus the record cannot be rolled
back. */ back. */
ut_a(err == DB_SUCCESS); ut_a(err == DB_SUCCESS);
/* Free the pages again in order to avoid a leak. */
btr_mark_freed_leaves(index, mtr, FALSE);
} }
mtr_commit(mtr); mtr_commit(mtr);
......
...@@ -912,7 +912,7 @@ trx_undo_add_page( ...@@ -912,7 +912,7 @@ trx_undo_add_page(
page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR
+ TRX_UNDO_FSEG_HEADER, + TRX_UNDO_FSEG_HEADER,
undo->top_page_no + 1, FSP_UP, undo->top_page_no + 1, FSP_UP,
TRUE, mtr, mtr); TRUE, mtr);
fil_space_release_free_extents(undo->space, n_reserved); fil_space_release_free_extents(undo->space, n_reserved);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment