Commit 382cb9fd authored by marko's avatar marko

branches/zip: Document and obey the rules for modifying the free bits in

the insert buffer bitmap.

ibuf_set_free_bits_func(): Never disable redo logging.

ibuf_update_free_bits_zip(): Remove.

btr_page_reorganize_low(), page_zip_reorganize(): Do not update the insert
buffer bitmap.  Instead, document that callers will have to take care of it,
and adapt the callers.

btr_compress(): On error, reset the insert buffer free bits.

btr_cur_insert_if_possible(): Do not modify the insert buffer bitmap.

btr_compress(), btr_cur_optimistic_insert(): On compressed pages,
reset the insert buffer bitmap.  Document why.

btr_cur_update_alloc_zip(): Document why it is necessary and sufficient
to reset the insert buffer free bits.

btr_cur_update_in_place(), btr_cur_optimistic_update(),
btr_cur_pessimistic_update(): Update the free bits in the same
mini-transaction.  Document that the mini-transaction must be
committed before latching any further pages.  Verify that this
is the case in all execution paths.

row_ins_sec_index_entry_by_modify(), row_ins_clust_index_entry_by_modify(),
row_undo_mod_clust_low(): Because these functions call
btr_cur_update_in_place(), btr_cur_optimistic_update(), or
btr_cur_pessimistic_update(), document that the mini-transaction must be
committed before latching any further pages.  Verify that this is the case
in all execution paths.
parent d5c198ea
......@@ -975,13 +975,6 @@ btr_page_reorganize_low(
success = TRUE;
}
/* On compressed pages, recompute the insert buffer free bits. */
if (UNIV_LIKELY_NULL(page_zip)
&& !dict_index_is_clust(index) && page_is_leaf(page)) {
ibuf_update_free_bits_zip(page_zip_get_size(page_zip), block);
}
func_exit:
#ifdef UNIV_ZIP_DEBUG
ut_a(!page_zip || page_zip_validate(page_zip, page));
......@@ -995,7 +988,11 @@ btr_page_reorganize_low(
}
/*****************************************************************
Reorganizes an index page. */
Reorganizes an index page.
IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
page of a non-clustered index, the caller must update the insert
buffer free bits in the same mini-transaction in such a way that the
modification will be redo-logged. */
ibool
btr_page_reorganize(
......@@ -2400,6 +2397,13 @@ btr_compress(
/* No space for merge */
err_exit:
/* We play it safe and reset the free bits. */
if (zip_size
&& page_is_leaf(merge_page)
&& !dict_index_is_clust(index)) {
ibuf_reset_free_bits(merge_block);
}
mem_heap_free(heap);
return(FALSE);
}
......@@ -2523,11 +2527,40 @@ btr_compress(
mem_heap_free(heap);
if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) {
/* We have added new records to merge_page:
update its free bits */
/* Update the free bits of the B-tree page in the
insert buffer bitmap. This has to be done in a
separate mini-transaction that is committed before the
main mini-transaction. We cannot update the insert
buffer bitmap in this mini-transaction, because
btr_compress() can be invoked recursively without
committing the mini-transaction in between. Since
insert buffer bitmap pages have a lower rank than
B-tree pages, we must not access other pages in the
same mini-transaction after accessing an insert buffer
bitmap page. */
/* The free bits in the insert buffer bitmap must
never exceed the free space on a page. It is safe to
decrement or reset the bits in the bitmap in a
mini-transaction that is committed before the
mini-transaction that affects the free space. */
/* It is unsafe to increment the bits in a separately
committed mini-transaction, because in crash recovery,
the free bits could momentarily be set too high. */
if (zip_size) {
ibuf_update_free_bits_zip(zip_size, merge_block);
/* Because the free bits may be incremented
and we cannot update the insert buffer bitmap
in the same mini-transaction, the only safe
thing we can do here is the pessimistic
approach: reset the free bits. */
ibuf_reset_free_bits(merge_block);
} else {
/* On uncompressed pages, the free bits will
never increase here. Thus, it is safe to
write the bits accurately in a separate
mini-transaction. */
ibuf_update_free_bits_if_full(merge_block,
UNIV_PAGE_SIZE,
ULINT_UNDEFINED);
......
......@@ -894,14 +894,6 @@ btr_cur_insert_if_possible(
}
}
if (buf_block_get_page_zip(block)
&& !dict_index_is_clust(cursor->index)
&& page_is_leaf(buf_block_get_frame(block))) {
/* Update the free bits in the insert buffer. */
ibuf_update_free_bits_zip(buf_block_get_zip_size(block),
block);
}
return(rec);
}
......@@ -1223,9 +1215,34 @@ btr_cur_optimistic_insert(
rec_size + PAGE_DIR_SLOT_SIZE, index->type);
#endif
if (!dict_index_is_clust(index) && UNIV_LIKELY(0 == level)) {
/* We have added a record to page: update its free bits */
/* Update the free bits of the B-tree page in the
insert buffer bitmap. This has to be done in a
separate mini-transaction that is committed before the
main mini-transaction. Since insert buffer bitmap
pages have a lower rank than B-tree pages, we must not
access other pages in the same mini-transaction after
accessing an insert buffer bitmap page. Because the
mini-transaction where this function is invoked may
access other pages, we must update the insert
buffer bitmap in a separate mini-transaction. */
/* The free bits in the insert buffer bitmap must
never exceed the free space on a page. It is safe to
decrement or reset the bits in the bitmap in a
mini-transaction that is committed before the
mini-transaction that affects the free space. */
/* It is unsafe to increment the bits in a separately
committed mini-transaction, because in crash recovery,
the free bits could momentarily be set too high. */
if (zip_size) {
ibuf_update_free_bits_zip(zip_size, block);
/* Because the free bits may be incremented
and we cannot update the insert buffer bitmap
in the same mini-transaction, the only safe
thing we can do here is the pessimistic
approach: reset the free bits. */
ibuf_reset_free_bits(block);
} else {
ibuf_update_free_bits_if_full(
block, max_size,
......@@ -1658,10 +1675,21 @@ btr_cur_update_alloc_zip(
return(FALSE);
}
/* After recompressing a page, we must make sure that the free
bits in the insert buffer bitmap will not exceed the free
space on the page. Because this function will not attempt
recompression unless page_zip_available() fails above, it is
safe to reset the free bits if page_zip_available() fails
again, below. The free bits can safely be reset in a separate
mini-transaction. If page_zip_available() succeeds below, we
can be sure that the page_zip_compress() above did not reduce
the free space available on the page. */
if (!page_zip_available(page_zip, dict_index_is_clust(index),
length, 0)) {
if (!dict_index_is_clust(index)) {
/* No space on the page: reset the free bits. */
/* Out of space: reset the free bits. */
if (!dict_index_is_clust(index)
&& page_is_leaf(buf_block_get_frame(block))) {
ibuf_reset_free_bits(block);
}
return(FALSE);
......@@ -1686,7 +1714,8 @@ btr_cur_update_in_place(
ulint cmpl_info,/* in: compiler info on secondary index
updates */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr) /* in: mtr */
mtr_t* mtr) /* in: mtr; must be committed before
latching any further pages */
{
dict_index_t* index;
buf_block_t* block;
......@@ -1728,13 +1757,6 @@ btr_cur_update_in_place(
thr, &roll_ptr);
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
if (page_zip && !dict_index_is_clust(index)
&& page_is_leaf(buf_block_get_frame(block))) {
/* Update the free bits in the insert buffer. */
ibuf_update_free_bits_zip(
page_zip_get_size(page_zip), block);
}
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
......@@ -1773,8 +1795,8 @@ btr_cur_update_in_place(
if (page_zip && !dict_index_is_clust(index)
&& page_is_leaf(buf_block_get_frame(block))) {
/* Update the free bits in the insert buffer. */
ibuf_update_free_bits_zip(buf_block_get_zip_size(block),
block);
ibuf_update_free_bits_low(buf_block_get_zip_size(block),
block, UNIV_PAGE_SIZE, mtr);
}
btr_cur_update_in_place_log(flags, rec, index, update,
......@@ -1820,7 +1842,8 @@ btr_cur_optimistic_update(
ulint cmpl_info,/* in: compiler info on secondary index
updates */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr) /* in: mtr */
mtr_t* mtr) /* in: mtr; must be committed before
latching any further pages */
{
dict_index_t* index;
page_cur_t* page_cursor;
......@@ -1870,6 +1893,7 @@ btr_cur_optimistic_update(
}
if (rec_offs_any_extern(offsets)) {
any_extern:
/* Externally stored fields are treated in pessimistic
update */
......@@ -1880,11 +1904,7 @@ btr_cur_optimistic_update(
for (i = 0; i < upd_get_n_fields(update); i++) {
if (upd_get_nth_field(update, i)->extern_storage) {
/* Externally stored fields are treated in pessimistic
update */
mem_heap_free(heap);
return(DB_OVERFLOW);
goto any_extern;
}
}
......@@ -1905,9 +1925,8 @@ btr_cur_optimistic_update(
if (UNIV_LIKELY_NULL(page_zip)
&& !btr_cur_update_alloc_zip(page_zip, block, index,
new_rec_size, mtr)) {
mem_heap_free(heap);
return(DB_ZIP_OVERFLOW);
err = DB_ZIP_OVERFLOW;
goto err_exit;
}
if (UNIV_UNLIKELY(new_rec_size
......@@ -1948,21 +1967,13 @@ btr_cur_optimistic_update(
&roll_ptr);
if (err != DB_SUCCESS) {
err_exit:
if (page_zip && !dict_index_is_clust(index)
&& page_is_leaf(page)) {
/* Update the free bits in the insert buffer. */
ibuf_update_free_bits_zip(
buf_block_get_zip_size(block), block);
}
mem_heap_free(heap);
return(err);
}
/* Ok, we may do the replacement. Store on the page infimum the
explicit locks on rec, before deleting rec (see the comment in
.._pessimistic_update). */
btr_cur_pessimistic_update). */
lock_rec_store_on_page_infimum(block, rec);
......@@ -1985,6 +1996,13 @@ btr_cur_optimistic_update(
rec = btr_cur_insert_if_possible(cursor, new_entry, NULL, 0, mtr);
ut_a(rec); /* <- We calculated above the insert would fit */
if (page_zip && !dict_index_is_clust(index)
&& page_is_leaf(page)) {
/* Update the free bits in the insert buffer. */
ibuf_update_free_bits_low(buf_block_get_zip_size(block), block,
UNIV_PAGE_SIZE, mtr);
}
if (!rec_get_deleted_flag(rec, page_is_comp(page))) {
/* The new inserted record owns its possible externally
stored fields */
......@@ -2077,7 +2095,8 @@ btr_cur_pessimistic_update(
ulint cmpl_info,/* in: compiler info on secondary index
updates */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr) /* in: mtr */
mtr_t* mtr) /* in: mtr; must be committed before
latching any further pages */
{
big_rec_t* big_rec_vec = NULL;
big_rec_t* dummy_big_rec;
......@@ -2237,7 +2256,6 @@ btr_cur_pessimistic_update(
rec = btr_cur_insert_if_possible(cursor, new_entry,
ext_vect, n_ext_vect, mtr);
ut_a(rec || optim_err != DB_UNDERFLOW);
if (rec) {
lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
......@@ -2255,8 +2273,24 @@ btr_cur_pessimistic_update(
btr_cur_compress_if_useful(cursor, mtr);
if (page_zip && !dict_index_is_clust(index)
&& page_is_leaf(page)) {
/* Update the free bits in the insert buffer. */
ibuf_update_free_bits_low(
buf_block_get_zip_size(block), block,
UNIV_PAGE_SIZE, mtr);
}
err = DB_SUCCESS;
goto return_after_reservations;
} else {
ut_a(optim_err != DB_UNDERFLOW);
/* Out of space: reset the free bits. */
if (!dict_index_is_clust(index)
&& page_is_leaf(page)) {
ibuf_reset_free_bits(block);
}
}
/* Was the record to be updated positioned as the first user
......@@ -2783,7 +2817,8 @@ btr_cur_optimistic_delete(
ut_a(!page_zip || page_zip_validate(page_zip, page));
#endif /* UNIV_ZIP_DEBUG */
if (!dict_index_is_clust(cursor->index)) {
if (!dict_index_is_clust(cursor->index)
&& page_is_leaf(page)) {
ibuf_update_free_bits_low(zip_size, block,
max_ins_size, mtr);
}
......
......@@ -857,15 +857,6 @@ ibuf_set_free_bits_func(
mtr_start(&mtr);
if (recv_recovery_is_on()) {
/* Do not write to the redo log, because there is
crash recovery in progress. Flushing the log would
require the possession of log_sys->mutex, which is
being held by the main thread. */
mtr_set_log_mode(&mtr, MTR_LOG_NONE);
}
space = buf_block_get_space(block);
page_no = buf_block_get_page_no(block);
zip_size = buf_block_get_zip_size(block);
......@@ -3307,7 +3298,7 @@ ibuf_merge_or_delete_for_page(
if (UNIV_UNLIKELY(corruption_noticed)) {
fputs("InnoDB: Discarding record\n ", stderr);
rec_print_old(stderr, ibuf_rec);
fputs("\n from the insert buffer!\n\n", stderr);
fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
} else if (block) {
/* Now we have at pcur a record which should be
inserted to the index page; NOTE that the call below
......
......@@ -230,7 +230,11 @@ btr_root_raise_and_insert(
ulint n_ext, /* in: number of elements in vec */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Reorganizes an index page. */
Reorganizes an index page.
IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
page of a non-clustered index, the caller must update the insert
buffer free bits in the same mini-transaction in such a way that the
modification will be redo-logged. */
ibool
btr_page_reorganize(
......
......@@ -224,7 +224,8 @@ btr_cur_update_in_place(
ulint cmpl_info,/* in: compiler info on secondary index
updates */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr */
mtr_t* mtr); /* in: mtr; must be committed before
latching any further pages */
/*****************************************************************
Tries to update a record on a page in an index tree. It is assumed that mtr
holds an x-latch on the page. The operation does not succeed if there is too
......@@ -248,7 +249,8 @@ btr_cur_optimistic_update(
ulint cmpl_info,/* in: compiler info on secondary index
updates */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr */
mtr_t* mtr); /* in: mtr; must be committed before
latching any further pages */
/*****************************************************************
Performs an update of a record on a page of a tree. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. If the
......@@ -271,7 +273,8 @@ btr_cur_pessimistic_update(
ulint cmpl_info,/* in: compiler info on secondary index
updates */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr); /* in: mtr */
mtr_t* mtr); /* in: mtr; must be committed before
latching any further pages */
/***************************************************************
Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id
......
......@@ -86,18 +86,6 @@ ibuf_update_free_bits_if_full(
ulint increase);/* in: upper limit for the additional space
used in the latest operation, if known, or
ULINT_UNDEFINED */
/****************************************************************************
Updates the free bits of a compressed page in the ibuf bitmap. This
is done in a separate mini-transaction, hence this operation does not
restrict further work to only ibuf bitmap operations, which would
result if the latch to the bitmap page were kept. */
UNIV_INLINE
void
ibuf_update_free_bits_zip(
/*======================*/
ulint zip_size,/* in: compressed page size in bytes */
buf_block_t* block); /* in: B-tree leaf page of a secondary
index */
/**************************************************************************
Updates the free bits for the page to reflect the present state. Does this
in the mtr given, which means that the latching order rules virtually
......
......@@ -279,30 +279,3 @@ ibuf_update_free_bits_if_full(
ibuf_set_free_bits(block, after, before);
}
}
/****************************************************************************
Updates the free bits of a compressed page in the ibuf bitmap. This
is done in a separate mini-transaction, hence this operation does not
restrict further work to only ibuf bitmap operations, which would
result if the latch to the bitmap page were kept. */
UNIV_INLINE
void
ibuf_update_free_bits_zip(
/*======================*/
ulint zip_size,/* in: compressed page size in bytes */
buf_block_t* block) /* in: B-tree leaf page of a secondary
index */
{
ulint free_bits = ibuf_index_page_calc_free(zip_size, block);
if (free_bits == 0) {
/* We move the page to the front of the buffer pool LRU list:
the purpose of this is to prevent those pages to which we
cannot make inserts using the insert buffer from slipping
out of the buffer pool */
buf_page_make_young(&block->page);
}
ibuf_set_free_bits(block, free_bits, ULINT_UNDEFINED);
}
......@@ -348,7 +348,11 @@ page_zip_write_header(
Reorganize and compress a page. This is a low-level operation for
compressed pages, to be used when page_zip_compress() fails.
On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
The function btr_page_reorganize() should be preferred whenever possible. */
The function btr_page_reorganize() should be preferred whenever possible.
IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
non-clustered index, the caller must update the insert buffer free
bits in the same mini-transaction in such a way that the modification
will be redo-logged. */
ibool
page_zip_reorganize(
......
......@@ -19,7 +19,6 @@ Created June 2005 by Marko Makela
#include "dict0dict.h"
#include "btr0sea.h"
#include "btr0cur.h"
#include "ibuf0ibuf.h"
#include "page0types.h"
#include "lock0lock.h"
#include "log0recv.h"
......@@ -3821,7 +3820,11 @@ page_zip_write_header_log(
Reorganize and compress a page. This is a low-level operation for
compressed pages, to be used when page_zip_compress() fails.
On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
The function btr_page_reorganize() should be preferred whenever possible. */
The function btr_page_reorganize() should be preferred whenever possible.
IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
non-clustered index, the caller must update the insert buffer free
bits in the same mini-transaction in such a way that the modification
will be redo-logged. */
ibool
page_zip_reorganize(
......@@ -3885,11 +3888,6 @@ page_zip_reorganize(
lock_move_reorganize_page(block, temp_block);
btr_search_drop_page_hash_index(block);
if (!dict_index_is_clust(index) && page_is_leaf(page)) {
/* Recompute the insert buffer free bits. */
ibuf_update_free_bits_zip(page_zip_get_size(page_zip), block);
}
buf_block_free(temp_block);
return(TRUE);
}
......
......@@ -215,7 +215,8 @@ row_ins_sec_index_entry_by_modify(
btr_cur_t* cursor, /* in: B-tree cursor */
const dtuple_t* entry, /* in: index entry to insert */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr) /* in: mtr */
mtr_t* mtr) /* in: mtr; must be committed before
latching any further pages */
{
big_rec_t* dummy_big_rec;
mem_heap_t* heap;
......@@ -292,7 +293,8 @@ row_ins_clust_index_entry_by_modify(
externally stored fields in entry, or NULL */
ulint n_ext_vec,/* in: number of fields in ext_vec */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr) /* in: mtr */
mtr_t* mtr) /* in: mtr; must be committed before
latching any further pages */
{
rec_t* rec;
upd_t* update;
......
......@@ -82,7 +82,8 @@ row_undo_mod_clust_low(
we may run out of file space */
undo_node_t* node, /* in: row undo node */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr, /* in: mtr */
mtr_t* mtr, /* in: mtr; must be committed before
latching any further pages */
ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
{
btr_pcur_t* pcur;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment