Commit a8f2f7af authored by Marko Mäkelä's avatar Marko Mäkelä

Bug #56680 wrong InnoDB results from a case-insensitive covering index

row_search_for_mysql(): When a secondary index record might not be
visible in the current transaction's read view and we consult the
clustered index and optionally some undo log records, return the
relevant columns of the clustered index record to MySQL instead of the
secondary index record.

REC_INFO_DELETED_FLAG: Move the definition from rem0rec.ic to rem0rec.h.

ibuf_insert_to_index_page_low(): New function, refactored from
ibuf_insert_to_index_page().

ibuf_insert_to_index_page(): When we are inserting a record in place
of a delete-marked record and some fields of the record differ, update
that record just like row_ins_sec_index_entry_by_modify() would do.

mysql_row_templ_t: Add clust_rec_field_no.

row_sel_store_mysql_rec(), row_sel_push_cache_row_for_mysql(): Add the
flag rec_clust, for returning data at clust_rec_field_no instead of
rec_field_no. Resurrect the debug assertion that the record not be
marked for deletion. (Bug #55626)

buf_LRU_free_block(): Refactored from
buf_LRU_search_and_free_block(). This is needed for the
innodb_change_buffering_debug diagnostics.

[UNIV_DEBUG || UNIV_IBUF_DEBUG] ibuf_debug, buf_page_get_gen(),
buf_flush_page_try():
Implement innodb_change_buffering_debug=1 for evicting pages from the
buffer pool, so that change buffering will be attempted more
frequently.
parent 78804bc8
SET GLOBAL tx_isolation='REPEATABLE-READ';
CREATE TABLE bug56680(
a INT AUTO_INCREMENT PRIMARY KEY,
b CHAR(1),
c INT,
INDEX(b))
ENGINE=InnoDB;
INSERT INTO bug56680 VALUES(0,'x',1);
BEGIN;
SELECT b FROM bug56680;
b
x
BEGIN;
UPDATE bug56680 SET b='X';
SELECT b FROM bug56680;
b
x
SELECT * FROM bug56680;
a b c
1 x 1
ROLLBACK;
SELECT b FROM bug56680;
b
x
SET GLOBAL tx_isolation='READ-UNCOMMITTED';
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
BEGIN;
SELECT b FROM bug56680 LIMIT 2;
b
x
x
BEGIN;
DELETE FROM bug56680 WHERE a=1;
INSERT INTO bug56680 VALUES(1,'X',1);
SELECT b FROM bug56680 LIMIT 3;
b
X
x
x
SELECT b FROM bug56680 LIMIT 2;
b
x
x
CHECK TABLE bug56680;
Table Op Msg_type Msg_text
test.bug56680 check status OK
ROLLBACK;
SELECT b FROM bug56680 LIMIT 2;
b
x
x
CHECK TABLE bug56680;
Table Op Msg_type Msg_text
test.bug56680 check status OK
SELECT b FROM bug56680 LIMIT 2;
b
x
x
CREATE TABLE bug56680_2(
a INT AUTO_INCREMENT PRIMARY KEY,
b VARCHAR(2) CHARSET latin1 COLLATE latin1_german2_ci,
c INT,
INDEX(b))
ENGINE=InnoDB;
INSERT INTO bug56680_2 SELECT 0,_latin1 0xdf,c FROM bug56680;
BEGIN;
SELECT HEX(b) FROM bug56680_2 LIMIT 2;
HEX(b)
DF
DF
DELETE FROM bug56680_2 WHERE a=1;
INSERT INTO bug56680_2 VALUES(1,'SS',1);
SELECT HEX(b) FROM bug56680_2 LIMIT 3;
HEX(b)
5353
DF
DF
CHECK TABLE bug56680_2;
Table Op Msg_type Msg_text
test.bug56680_2 check status OK
DELETE FROM bug56680_2 WHERE a=1;
INSERT INTO bug56680_2 VALUES(1,_latin1 0xdf,1);
SELECT HEX(b) FROM bug56680_2 LIMIT 3;
HEX(b)
DF
DF
DF
CHECK TABLE bug56680_2;
Table Op Msg_type Msg_text
test.bug56680_2 check status OK
DROP TABLE bug56680_2;
DROP TABLE bug56680;
#
# Bug #56680 InnoDB may return wrong results from a case-insensitive index
#
-- source include/have_innodb.inc
-- disable_query_log
SET @tx_isolation_orig = @@tx_isolation;
# The flag innodb_change_buffering_debug is only available in debug builds.
# It instructs InnoDB to try to evict pages from the buffer pool when
# change buffering is possible, so that the change buffer will be used
# whenever possible.
-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE
SET @innodb_change_buffering_debug_orig = @@innodb_change_buffering_debug;
-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE
SET GLOBAL innodb_change_buffering_debug = 1;
-- enable_query_log
SET GLOBAL tx_isolation='REPEATABLE-READ';
CREATE TABLE bug56680(
a INT AUTO_INCREMENT PRIMARY KEY,
b CHAR(1),
c INT,
INDEX(b))
ENGINE=InnoDB;
INSERT INTO bug56680 VALUES(0,'x',1);
BEGIN;
SELECT b FROM bug56680;
connect (con1,localhost,root,,);
connection con1;
BEGIN;
UPDATE bug56680 SET b='X';
connection default;
# This should return the last committed value 'x', but would return 'X'
# due to a bug in row_search_for_mysql().
SELECT b FROM bug56680;
# This would always return the last committed value 'x'.
SELECT * FROM bug56680;
connection con1;
ROLLBACK;
disconnect con1;
connection default;
SELECT b FROM bug56680;
# For the rest of this test, use the READ UNCOMMITTED isolation level
# to see what exists in the secondary index.
SET GLOBAL tx_isolation='READ-UNCOMMITTED';
# Create enough rows for the table, so that the insert buffer will be
# used for modifying the secondary index page. There must be multiple
# index pages, because changes to the root page are never buffered.
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
INSERT INTO bug56680 SELECT 0,b,c FROM bug56680;
BEGIN;
SELECT b FROM bug56680 LIMIT 2;
connect (con1,localhost,root,,);
connection con1;
BEGIN;
DELETE FROM bug56680 WHERE a=1;
# This should be buffered, if innodb_change_buffering_debug = 1 is in effect.
INSERT INTO bug56680 VALUES(1,'X',1);
# This should force an insert buffer merge, and return 'X' in the first row.
SELECT b FROM bug56680 LIMIT 3;
connection default;
SELECT b FROM bug56680 LIMIT 2;
CHECK TABLE bug56680;
connection con1;
ROLLBACK;
SELECT b FROM bug56680 LIMIT 2;
CHECK TABLE bug56680;
connection default;
disconnect con1;
SELECT b FROM bug56680 LIMIT 2;
CREATE TABLE bug56680_2(
a INT AUTO_INCREMENT PRIMARY KEY,
b VARCHAR(2) CHARSET latin1 COLLATE latin1_german2_ci,
c INT,
INDEX(b))
ENGINE=InnoDB;
INSERT INTO bug56680_2 SELECT 0,_latin1 0xdf,c FROM bug56680;
BEGIN;
SELECT HEX(b) FROM bug56680_2 LIMIT 2;
DELETE FROM bug56680_2 WHERE a=1;
# This should be buffered, if innodb_change_buffering_debug = 1 is in effect.
INSERT INTO bug56680_2 VALUES(1,'SS',1);
# This should force an insert buffer merge, and return 'SS' in the first row.
SELECT HEX(b) FROM bug56680_2 LIMIT 3;
CHECK TABLE bug56680_2;
DELETE FROM bug56680_2 WHERE a=1;
# This should be buffered, if innodb_change_buffering_debug = 1 is in effect.
INSERT INTO bug56680_2 VALUES(1,_latin1 0xdf,1);
# This should force an insert buffer merge, and return 0xdf in the first row.
SELECT HEX(b) FROM bug56680_2 LIMIT 3;
CHECK TABLE bug56680_2;
DROP TABLE bug56680_2;
DROP TABLE bug56680;
-- disable_query_log
SET GLOBAL tx_isolation = @tx_isolation_orig;
-- error 0, ER_UNKNOWN_SYSTEM_VARIABLE
SET GLOBAL innodb_change_buffering_debug = @innodb_change_buffering_debug_orig;
...@@ -1270,6 +1270,30 @@ loop: ...@@ -1270,6 +1270,30 @@ loop:
buf_awe_map_page_to_frame(block, TRUE); buf_awe_map_page_to_frame(block, TRUE);
} }
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
if (mode == BUF_GET_IF_IN_POOL && ibuf_debug) {
/* Try to evict the block from the buffer pool, to use the
insert buffer as much as possible. */
if (buf_LRU_free_block(block)) {
mutex_exit(&buf_pool->mutex);
mutex_exit(&block->mutex);
fprintf(stderr,
"innodb_change_buffering_debug evict %u %u\n",
(unsigned) space, (unsigned) offset);
return(NULL);
} else if (buf_flush_page_try(block)) {
fprintf(stderr,
"innodb_change_buffering_debug flush %u %u\n",
(unsigned) space, (unsigned) offset);
guess = block->frame;
goto loop;
}
/* Failed to evict the page; change it directly */
}
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
#ifdef UNIV_SYNC_DEBUG #ifdef UNIV_SYNC_DEBUG
buf_block_buf_fix_inc_debug(block, file, line); buf_block_buf_fix_inc_debug(block, file, line);
#else #else
......
...@@ -723,6 +723,82 @@ buf_flush_try_page( ...@@ -723,6 +723,82 @@ buf_flush_try_page(
return(0); return(0);
} }
# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
/**********************************************************************
Writes a flushable page asynchronously from the buffer pool to a file.
NOTE: buf_pool_mutex and block->mutex must be held upon entering this
function, and they will be released by this function after flushing.
This is loosely based on buf_flush_batch() and buf_flush_try_page(). */
ibool
buf_flush_page_try(
/*===============*/
/* out: TRUE if flushed and
mutexes released */
buf_block_t* block) /*!< in/out: buffer control block */
{
ut_ad(mutex_own(&buf_pool->mutex));
ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
ut_ad(mutex_own(&block->mutex));
if (!buf_flush_ready_for_flush(block, BUF_FLUSH_LRU)) {
return(FALSE);
}
if (buf_pool->n_flush[BUF_FLUSH_LRU] > 0
|| buf_pool->init_flush[BUF_FLUSH_LRU]) {
/* There is already a flush batch of the same type running */
return(FALSE);
}
buf_pool->init_flush[BUF_FLUSH_LRU] = TRUE;
block->io_fix = BUF_IO_WRITE;
block->flush_type = BUF_FLUSH_LRU;
if (buf_pool->n_flush[BUF_FLUSH_LRU]++ == 0) {
os_event_reset(buf_pool->no_flush[BUF_FLUSH_LRU]);
}
/* VERY IMPORTANT:
Because any thread may call the LRU flush, even when owning
locks on pages, to avoid deadlocks, we must make sure that the
s-lock is acquired on the page without waiting: this is
accomplished because buf_flush_ready_for_flush() must hold,
and that requires the page not to be bufferfixed. */
rw_lock_s_lock_gen(&block->lock, BUF_IO_WRITE);
/* Note that the s-latch is acquired before releasing the
buf_pool mutex: this ensures that the latch is acquired
immediately. */
mutex_exit(&block->mutex);
mutex_exit(&buf_pool->mutex);
/* Even though block is not protected by any mutex at this
point, it is safe to access block, because it is io_fixed and
oldest_modification != 0. Thus, it cannot be relocated in the
buffer pool or removed from flush_list or LRU_list. */
buf_flush_write_block_low(block);
mutex_enter(&buf_pool->mutex);
buf_pool->init_flush[BUF_FLUSH_LRU] = FALSE;
if (buf_pool->n_flush[BUF_FLUSH_LRU] == 0) {
/* The running flush batch has ended */
os_event_set(buf_pool->no_flush[BUF_FLUSH_LRU]);
}
mutex_exit(&buf_pool->mutex);
buf_flush_buffered_writes();
return(TRUE);
}
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
/*************************************************************** /***************************************************************
Flushes to disk all flushable pages within the flush area. */ Flushes to disk all flushable pages within the flush area. */
static static
......
...@@ -321,35 +321,17 @@ buf_LRU_get_recent_limit(void) ...@@ -321,35 +321,17 @@ buf_LRU_get_recent_limit(void)
} }
/********************************************************************** /**********************************************************************
Look for a replaceable block from the end of the LRU list and put it to Try to put a block from the LRU list to the free list. */
the free list if found. */
ibool ibool
buf_LRU_search_and_free_block( buf_LRU_free_block(
/*==========================*/ /*===============*/
/* out: TRUE if freed */ /* out: TRUE if freed */
ulint n_iterations) /* in: how many times this has been called buf_block_t* block) /* in/out: block to be freed */
repeatedly without result: a high value means
that we should search farther; if value is
k < 10, then we only search k/10 * [number
of pages in the buffer pool] from the end
of the LRU list */
{ {
buf_block_t* block; if (!buf_flush_ready_for_replace(block)) {
ulint distance = 0; return(FALSE);
ibool freed; }
mutex_enter(&(buf_pool->mutex));
freed = FALSE;
block = UT_LIST_GET_LAST(buf_pool->LRU);
while (block != NULL) {
ut_a(block->in_LRU_list);
mutex_enter(&block->mutex);
if (buf_flush_ready_for_replace(block)) {
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
if (buf_debug_prints) { if (buf_debug_prints) {
...@@ -388,14 +370,45 @@ buf_LRU_search_and_free_block( ...@@ -388,14 +370,45 @@ buf_LRU_search_and_free_block(
mutex_enter(&block->mutex); mutex_enter(&block->mutex);
buf_LRU_block_free_hashed_page(block); buf_LRU_block_free_hashed_page(block);
freed = TRUE;
return(TRUE);
}
/**********************************************************************
Look for a replaceable block from the end of the LRU list and put it to
the free list if found. */
ibool
buf_LRU_search_and_free_block(
/*==========================*/
/* out: TRUE if freed */
ulint n_iterations) /* in: how many times this has been called
repeatedly without result: a high value means
that we should search farther; if value is
k < 10, then we only search k/10 * [number
of pages in the buffer pool] from the end
of the LRU list */
{
buf_block_t* block;
ulint distance = 0;
ibool freed;
mutex_enter(&(buf_pool->mutex));
freed = FALSE;
block = UT_LIST_GET_LAST(buf_pool->LRU);
while (block != NULL) {
ut_a(block->in_LRU_list);
mutex_enter(&block->mutex);
freed = buf_LRU_free_block(block);
mutex_exit(&block->mutex); mutex_exit(&block->mutex);
if (freed) {
break; break;
} }
mutex_exit(&block->mutex);
block = UT_LIST_GET_PREV(LRU, block); block = UT_LIST_GET_PREV(LRU, block);
distance++; distance++;
......
...@@ -79,6 +79,7 @@ extern "C" { ...@@ -79,6 +79,7 @@ extern "C" {
#include "../storage/innobase/include/dict0crea.h" #include "../storage/innobase/include/dict0crea.h"
#include "../storage/innobase/include/btr0cur.h" #include "../storage/innobase/include/btr0cur.h"
#include "../storage/innobase/include/btr0btr.h" #include "../storage/innobase/include/btr0btr.h"
#include "../storage/innobase/include/ibuf0ibuf.h"
#include "../storage/innobase/include/fsp0fsp.h" #include "../storage/innobase/include/fsp0fsp.h"
#include "../storage/innobase/include/sync0sync.h" #include "../storage/innobase/include/sync0sync.h"
#include "../storage/innobase/include/fil0fil.h" #include "../storage/innobase/include/fil0fil.h"
...@@ -3723,18 +3724,19 @@ include_field: ...@@ -3723,18 +3724,19 @@ include_field:
n_requested_fields++; n_requested_fields++;
templ->col_no = i; templ->col_no = i;
templ->clust_rec_field_no = dict_col_get_clust_pos_noninline(
&index->table->cols[i], clust_index);
ut_ad(templ->clust_rec_field_no != ULINT_UNDEFINED);
if (index == clust_index) { if (index == clust_index) {
templ->rec_field_no = dict_col_get_clust_pos_noninline( templ->rec_field_no = templ->clust_rec_field_no;
&index->table->cols[i], index);
} else { } else {
templ->rec_field_no = dict_index_get_nth_col_pos( templ->rec_field_no = dict_index_get_nth_col_pos(
index, i); index, i);
}
if (templ->rec_field_no == ULINT_UNDEFINED) { if (templ->rec_field_no == ULINT_UNDEFINED) {
prebuilt->need_to_access_clustered = TRUE; prebuilt->need_to_access_clustered = TRUE;
} }
}
if (field->null_ptr) { if (field->null_ptr) {
templ->mysql_null_byte_offset = templ->mysql_null_byte_offset =
...@@ -3785,9 +3787,7 @@ skip_field: ...@@ -3785,9 +3787,7 @@ skip_field:
for (i = 0; i < n_requested_fields; i++) { for (i = 0; i < n_requested_fields; i++) {
templ = prebuilt->mysql_template + i; templ = prebuilt->mysql_template + i;
templ->rec_field_no = dict_col_get_clust_pos_noninline( templ->rec_field_no = templ->clust_rec_field_no;
&index->table->cols[templ->col_no],
clust_index);
} }
} }
} }
...@@ -8990,6 +8990,13 @@ static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode, ...@@ -8990,6 +8990,13 @@ static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */ AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */
AUTOINC_NO_LOCKING, 0); /* Maximum value */ AUTOINC_NO_LOCKING, 0); /* Maximum value */
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
PLUGIN_VAR_RQCMDARG,
"Debug flags for InnoDB change buffering (0=none)",
NULL, NULL, 0, 0, 1, 0);
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
static struct st_mysql_sys_var* innobase_system_variables[]= { static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(additional_mem_pool_size), MYSQL_SYSVAR(additional_mem_pool_size),
MYSQL_SYSVAR(autoextend_increment), MYSQL_SYSVAR(autoextend_increment),
...@@ -9031,6 +9038,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { ...@@ -9031,6 +9038,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(thread_concurrency), MYSQL_SYSVAR(thread_concurrency),
MYSQL_SYSVAR(thread_sleep_delay), MYSQL_SYSVAR(thread_sleep_delay),
MYSQL_SYSVAR(autoinc_lock_mode), MYSQL_SYSVAR(autoinc_lock_mode),
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
MYSQL_SYSVAR(change_buffering_debug),
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
NULL NULL
}; };
......
...@@ -22,6 +22,7 @@ Created 7/19/1997 Heikki Tuuri ...@@ -22,6 +22,7 @@ Created 7/19/1997 Heikki Tuuri
#include "btr0cur.h" #include "btr0cur.h"
#include "btr0pcur.h" #include "btr0pcur.h"
#include "btr0btr.h" #include "btr0btr.h"
#include "row0upd.h"
#include "sync0sync.h" #include "sync0sync.h"
#include "dict0boot.h" #include "dict0boot.h"
#include "fut0lst.h" #include "fut0lst.h"
...@@ -137,6 +138,11 @@ access order rules. */ ...@@ -137,6 +138,11 @@ access order rules. */
/* Buffer pool size per the maximum insert buffer size */ /* Buffer pool size per the maximum insert buffer size */
#define IBUF_POOL_SIZE_PER_MAX_SIZE 2 #define IBUF_POOL_SIZE_PER_MAX_SIZE 2
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
/* Flag to control insert buffer debugging. */
uint ibuf_debug;
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
/* The insert buffer control structure */ /* The insert buffer control structure */
ibuf_t* ibuf = NULL; ibuf_t* ibuf = NULL;
...@@ -2819,6 +2825,72 @@ ibuf_insert( ...@@ -2819,6 +2825,72 @@ ibuf_insert(
} }
} }
/************************************************************************
During merge, inserts to an index page a secondary index entry extracted
from the insert buffer. */
static
void
ibuf_insert_to_index_page_low(
/*==========================*/
dtuple_t* entry, /* in: buffered entry to insert */
page_t* page, /* in: index page where the buffered entry
should be placed */
dict_index_t* index, /* in: record descriptor */
mtr_t* mtr, /* in: mtr */
page_cur_t* page_cur)/* in: cursor positioned on the record
after which to insert the buffered entry */
{
ulint space;
ulint page_no;
page_t* bitmap_page;
ulint old_bits;
if (UNIV_LIKELY
(page_cur_tuple_insert(page_cur, entry, index, mtr) != NULL)) {
return;
}
/* If the record did not fit, reorganize */
btr_page_reorganize(page, index, mtr);
page_cur_search(page, index, entry, PAGE_CUR_LE, page_cur);
/* This time the record must fit */
if (UNIV_LIKELY
(page_cur_tuple_insert(page_cur, entry, index, mtr) != NULL)) {
return;
}
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: Insert buffer insert fails;"
" page free %lu, dtuple size %lu\n",
(ulong) page_get_max_insert_size(page, 1),
(ulong) rec_get_converted_size(index, entry));
fputs("InnoDB: Cannot insert index record ", stderr);
dtuple_print(stderr, entry);
fputs("\nInnoDB: The table where this index record belongs\n"
"InnoDB: is now probably corrupt. Please run CHECK TABLE on\n"
"InnoDB: that table.\n", stderr);
space = buf_frame_get_space_id(page);
page_no = buf_frame_get_page_no(page);
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, mtr);
old_bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no,
IBUF_BITMAP_FREE, mtr);
fprintf(stderr,
"InnoDB: space %lu, page %lu, bitmap bits %lu\n",
(ulong) space, (ulong) page_no, (ulong) old_bits);
fputs("InnoDB: Submit a detailed bug report"
" to http://bugs.mysql.com\n", stderr);
}
/************************************************************************ /************************************************************************
During merge, inserts to an index page a secondary index entry extracted During merge, inserts to an index page a secondary index entry extracted
from the insert buffer. */ from the insert buffer. */
...@@ -2835,11 +2907,10 @@ ibuf_insert_to_index_page( ...@@ -2835,11 +2907,10 @@ ibuf_insert_to_index_page(
page_cur_t page_cur; page_cur_t page_cur;
ulint low_match; ulint low_match;
rec_t* rec; rec_t* rec;
page_t* bitmap_page;
ulint old_bits;
ut_ad(ibuf_inside()); ut_ad(ibuf_inside());
ut_ad(dtuple_check_typed(entry)); ut_ad(dtuple_check_typed(entry));
ut_ad(!buf_block_align(page)->is_hashed);
if (UNIV_UNLIKELY(dict_table_is_comp(index->table) if (UNIV_UNLIKELY(dict_table_is_comp(index->table)
!= (ibool)!!page_is_comp(page))) { != (ibool)!!page_is_comp(page))) {
...@@ -2877,61 +2948,79 @@ dump: ...@@ -2877,61 +2948,79 @@ dump:
low_match = page_cur_search(page, index, entry, low_match = page_cur_search(page, index, entry,
PAGE_CUR_LE, &page_cur); PAGE_CUR_LE, &page_cur);
if (low_match == dtuple_get_n_fields(entry)) { if (UNIV_UNLIKELY(low_match == dtuple_get_n_fields(entry))) {
mem_heap_t* heap;
upd_t* update;
ulint* offsets;
rec = page_cur_get_rec(&page_cur); rec = page_cur_get_rec(&page_cur);
/* This is based on
row_ins_sec_index_entry_by_modify(BTR_MODIFY_LEAF). */
ut_ad(rec_get_deleted_flag(rec, page_is_comp(page)));
heap = mem_heap_create(1024);
offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED,
&heap);
update = row_upd_build_sec_rec_difference_binary(
index, entry, rec, NULL, heap);
if (update->n_fields == 0) {
/* The records only differ in the delete-mark.
Clear the delete-mark, like we did before
Bug #56680 was fixed. */
btr_cur_del_unmark_for_ibuf(rec, mtr); btr_cur_del_unmark_for_ibuf(rec, mtr);
} else { updated_in_place:
rec = page_cur_tuple_insert(&page_cur, entry, index, mtr); mem_heap_free(heap);
return;
}
if (rec == NULL) { /* Copy the info bits. Clear the delete-mark. */
/* If the record did not fit, reorganize */ update->info_bits = rec_get_info_bits(rec, page_is_comp(page));
update->info_bits &= ~REC_INFO_DELETED_FLAG;
btr_page_reorganize(page, index, mtr); /* We cannot invoke btr_cur_optimistic_update() here,
because we do not have a btr_cur_t or que_thr_t,
as the insert buffer merge occurs at a very low level. */
if (!row_upd_changes_field_size_or_external(index, offsets,
update)) {
/* This is the easy case. Do something similar
to btr_cur_update_in_place(). */
row_upd_rec_in_place(rec, offsets, update);
goto updated_in_place;
}
page_cur_search(page, index, entry, /* A collation may identify values that differ in
PAGE_CUR_LE, &page_cur); storage length.
Some examples (1 or 2 bytes):
utf8_turkish_ci: I = U+0131 LATIN SMALL LETTER DOTLESS I
utf8_general_ci: S = U+00DF LATIN SMALL LETTER SHARP S
utf8_general_ci: A = U+00E4 LATIN SMALL LETTER A WITH DIAERESIS
/* This time the record must fit */ latin1_german2_ci: SS = U+00DF LATIN SMALL LETTER SHARP S
if (UNIV_UNLIKELY(!page_cur_tuple_insert(
&page_cur, entry, index,
mtr))) {
ut_print_timestamp(stderr); Examples of a character (3-byte UTF-8 sequence)
identified with 2 or 4 characters (1-byte UTF-8 sequences):
fprintf(stderr, utf8_unicode_ci: 'II' = U+2171 SMALL ROMAN NUMERAL TWO
" InnoDB: Error: Insert buffer insert" utf8_unicode_ci: '(10)' = U+247D PARENTHESIZED NUMBER TEN
" fails; page free %lu," */
" dtuple size %lu\n",
(ulong) page_get_max_insert_size(
page, 1),
(ulong) rec_get_converted_size(
index, entry));
fputs("InnoDB: Cannot insert index record ",
stderr);
dtuple_print(stderr, entry);
fputs("\nInnoDB: The table where"
" this index record belongs\n"
"InnoDB: is now probably corrupt."
" Please run CHECK TABLE on\n"
"InnoDB: that table.\n", stderr);
bitmap_page = ibuf_bitmap_get_map_page( /* Delete the different-length record, and insert the
buf_frame_get_space_id(page), buffered one. */
buf_frame_get_page_no(page),
mtr);
old_bits = ibuf_bitmap_page_get_bits(
bitmap_page,
buf_frame_get_page_no(page),
IBUF_BITMAP_FREE, mtr);
fprintf(stderr, "InnoDB: Bitmap bits %lu\n", lock_rec_store_on_page_infimum(page, rec);
(ulong) old_bits); page_cur_delete_rec(&page_cur, index, offsets, mtr);
page_cur_move_to_prev(&page_cur);
mem_heap_free(heap);
fputs("InnoDB: Submit a detailed bug report" ibuf_insert_to_index_page_low(entry, page, index, mtr,
" to http://bugs.mysql.com\n", stderr); &page_cur);
} lock_rec_restore_from_page_infimum(rec, page);
} } else {
ibuf_insert_to_index_page_low(entry, page, index, mtr,
&page_cur);
} }
} }
......
...@@ -38,6 +38,20 @@ buf_flush_init_for_writing( ...@@ -38,6 +38,20 @@ buf_flush_init_for_writing(
dulint newest_lsn, /* in: newest modification lsn to the page */ dulint newest_lsn, /* in: newest modification lsn to the page */
ulint space, /* in: space id */ ulint space, /* in: space id */
ulint page_no); /* in: page number */ ulint page_no); /* in: page number */
# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
/**********************************************************************
Writes a flushable page asynchronously from the buffer pool to a file.
NOTE: buf_pool_mutex and block->mutex must be held upon entering this
function, and they will be released by this function after flushing.
This is loosely based on buf_flush_batch() and buf_flush_try_page(). */
ibool
buf_flush_page_try(
/*===============*/
/* out: TRUE if flushed and
mutexes released */
buf_block_t* block); /*!< in/out: buffer control block */
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
/*********************************************************************** /***********************************************************************
This utility flushes dirty blocks from the end of the LRU list or flush_list. This utility flushes dirty blocks from the end of the LRU list or flush_list.
NOTE 1: in the case of an LRU flush the calling thread may own latches to NOTE 1: in the case of an LRU flush the calling thread may own latches to
......
...@@ -66,6 +66,14 @@ buf_LRU_get_recent_limit(void); ...@@ -66,6 +66,14 @@ buf_LRU_get_recent_limit(void);
/*==========================*/ /*==========================*/
/* out: the limit; zero if could not determine it */ /* out: the limit; zero if could not determine it */
/********************************************************************** /**********************************************************************
Try to put a block from the LRU list to the free list. */
ibool
buf_LRU_free_block(
/*===============*/
/* out: TRUE if freed */
buf_block_t* block); /* in/out: block to be freed */
/**********************************************************************
Look for a replaceable block from the end of the LRU list and put it to Look for a replaceable block from the end of the LRU list and put it to
the free list if found. */ the free list if found. */
......
...@@ -18,6 +18,11 @@ Created 7/19/1997 Heikki Tuuri ...@@ -18,6 +18,11 @@ Created 7/19/1997 Heikki Tuuri
#include "ibuf0types.h" #include "ibuf0types.h"
#include "fsp0fsp.h" #include "fsp0fsp.h"
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
/* Flag to control insert buffer debugging. */
extern uint ibuf_debug;
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
extern ibuf_t* ibuf; extern ibuf_t* ibuf;
/********************************************************************** /**********************************************************************
......
...@@ -19,6 +19,9 @@ if and only if the record is the first user record on a non-leaf ...@@ -19,6 +19,9 @@ if and only if the record is the first user record on a non-leaf
B-tree page that is the leftmost page on its level B-tree page that is the leftmost page on its level
(PAGE_LEVEL is nonzero and FIL_PAGE_PREV is FIL_NULL). */ (PAGE_LEVEL is nonzero and FIL_PAGE_PREV is FIL_NULL). */
#define REC_INFO_MIN_REC_FLAG 0x10UL #define REC_INFO_MIN_REC_FLAG 0x10UL
/* The deleted flag in info bits */
#define REC_INFO_DELETED_FLAG 0x20UL /* when bit is set to 1, it means the
record has been delete marked */
/* Number of extra bytes in an old-style record, /* Number of extra bytes in an old-style record,
in addition to the data and the offsets */ in addition to the data and the offsets */
......
...@@ -98,9 +98,6 @@ and the shift needed to obtain each bit-field of the record. */ ...@@ -98,9 +98,6 @@ and the shift needed to obtain each bit-field of the record. */
#define REC_INFO_BITS_MASK 0xF0UL #define REC_INFO_BITS_MASK 0xF0UL
#define REC_INFO_BITS_SHIFT 0 #define REC_INFO_BITS_SHIFT 0
/* The deleted flag in info bits */
#define REC_INFO_DELETED_FLAG 0x20UL /* when bit is set to 1, it means the
record has been delete marked */
/* The following masks are used to filter the SQL null bit from /* The following masks are used to filter the SQL null bit from
one-byte and two-byte offsets */ one-byte and two-byte offsets */
......
...@@ -485,6 +485,10 @@ struct mysql_row_templ_struct { ...@@ -485,6 +485,10 @@ struct mysql_row_templ_struct {
Innobase record in the current index; Innobase record in the current index;
not defined if template_type is not defined if template_type is
ROW_MYSQL_WHOLE_ROW */ ROW_MYSQL_WHOLE_ROW */
ulint clust_rec_field_no; /* field number of the column in an
Innobase record in the clustered index;
not defined if template_type is
ROW_MYSQL_WHOLE_ROW */
ulint mysql_col_offset; /* offset of the column in the MySQL ulint mysql_col_offset; /* offset of the column in the MySQL
row format */ row format */
ulint mysql_col_len; /* length of the column in the MySQL ulint mysql_col_len; /* length of the column in the MySQL
......
...@@ -129,9 +129,11 @@ row_upd_changes_field_size_or_external( ...@@ -129,9 +129,11 @@ row_upd_changes_field_size_or_external(
const ulint* offsets,/* in: rec_get_offsets(rec, index) */ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
upd_t* update);/* in: update vector */ upd_t* update);/* in: update vector */
/*************************************************************** /***************************************************************
Replaces the new column values stored in the update vector to the record Replaces the new column values stored in the update vector to the
given. No field size changes are allowed. This function is used only for record given. No field size changes are allowed. This function is
a clustered index */ usually invoked on a clustered index. The only use case for a
secondary index is row_ins_sec_index_entry_by_modify() or its
counterpart in ibuf_insert_to_index_page(). */
void void
row_upd_rec_in_place( row_upd_rec_in_place(
......
...@@ -400,7 +400,7 @@ row_mysql_convert_row_to_innobase( ...@@ -400,7 +400,7 @@ row_mysql_convert_row_to_innobase(
row is used, as row may contain row is used, as row may contain
pointers to this record! */ pointers to this record! */
{ {
mysql_row_templ_t* templ; const mysql_row_templ_t*templ;
dfield_t* dfield; dfield_t* dfield;
ulint i; ulint i;
......
...@@ -2601,20 +2601,21 @@ row_sel_store_mysql_rec( ...@@ -2601,20 +2601,21 @@ row_sel_store_mysql_rec(
row_prebuilt_t* prebuilt, /* in: prebuilt struct */ row_prebuilt_t* prebuilt, /* in: prebuilt struct */
rec_t* rec, /* in: Innobase record in the index rec_t* rec, /* in: Innobase record in the index
which was described in prebuilt's which was described in prebuilt's
template */ template, or in the clustered index;
must be protected by a page latch */
ibool rec_clust, /* in: TRUE if rec is in the clustered
index instead of prebuilt->index */
const ulint* offsets) /* in: array returned by const ulint* offsets) /* in: array returned by
rec_get_offsets() */ rec_get_offsets(rec) */
{ {
mysql_row_templ_t* templ;
mem_heap_t* extern_field_heap = NULL; mem_heap_t* extern_field_heap = NULL;
mem_heap_t* heap; mem_heap_t* heap;
byte* data;
ulint len;
ulint i; ulint i;
ut_ad(prebuilt->mysql_template); ut_ad(prebuilt->mysql_template);
ut_ad(prebuilt->default_rec); ut_ad(prebuilt->default_rec);
ut_ad(rec_offs_validate(rec, NULL, offsets)); ut_ad(rec_offs_validate(rec, NULL, offsets));
ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) { if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
mem_heap_free(prebuilt->blob_heap); mem_heap_free(prebuilt->blob_heap);
...@@ -2623,10 +2624,15 @@ row_sel_store_mysql_rec( ...@@ -2623,10 +2624,15 @@ row_sel_store_mysql_rec(
for (i = 0; i < prebuilt->n_template; i++) { for (i = 0; i < prebuilt->n_template; i++) {
templ = prebuilt->mysql_template + i; const mysql_row_templ_t*templ = prebuilt->mysql_template + i;
byte* data;
ulint len;
ulint field_no;
if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no = rec_clust
templ->rec_field_no))) { ? templ->clust_rec_field_no : templ->rec_field_no;
if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no))) {
/* Copy an externally stored field to the temporary /* Copy an externally stored field to the temporary
heap */ heap */
...@@ -2652,15 +2658,13 @@ row_sel_store_mysql_rec( ...@@ -2652,15 +2658,13 @@ row_sel_store_mysql_rec(
causes an assert */ causes an assert */
data = btr_rec_copy_externally_stored_field( data = btr_rec_copy_externally_stored_field(
rec, offsets, templ->rec_field_no, rec, offsets, field_no, &len, heap);
&len, heap);
ut_a(len != UNIV_SQL_NULL); ut_a(len != UNIV_SQL_NULL);
} else { } else {
/* Field is stored in the row. */ /* Field is stored in the row. */
data = rec_get_nth_field(rec, offsets, data = rec_get_nth_field(rec, offsets, field_no, &len);
templ->rec_field_no, &len);
if (UNIV_UNLIKELY(templ->type == DATA_BLOB) if (UNIV_UNLIKELY(templ->type == DATA_BLOB)
&& len != UNIV_SQL_NULL) { && len != UNIV_SQL_NULL) {
...@@ -3019,7 +3023,7 @@ row_sel_pop_cached_row_for_mysql( ...@@ -3019,7 +3023,7 @@ row_sel_pop_cached_row_for_mysql(
row_prebuilt_t* prebuilt) /* in: prebuilt struct */ row_prebuilt_t* prebuilt) /* in: prebuilt struct */
{ {
ulint i; ulint i;
mysql_row_templ_t* templ; const mysql_row_templ_t*templ;
byte* cached_rec; byte* cached_rec;
ut_ad(prebuilt->n_fetch_cached > 0); ut_ad(prebuilt->n_fetch_cached > 0);
ut_ad(prebuilt->mysql_prefix_len <= prebuilt->mysql_row_len); ut_ad(prebuilt->mysql_prefix_len <= prebuilt->mysql_row_len);
...@@ -3075,14 +3079,19 @@ void ...@@ -3075,14 +3079,19 @@ void
row_sel_push_cache_row_for_mysql( row_sel_push_cache_row_for_mysql(
/*=============================*/ /*=============================*/
row_prebuilt_t* prebuilt, /* in: prebuilt struct */ row_prebuilt_t* prebuilt, /* in: prebuilt struct */
rec_t* rec, /* in: record to push */ rec_t* rec, /* in: Innobase record in the index
const ulint* offsets) /* in: rec_get_offsets() */ which was described in prebuilt's
template, or in the clustered index */
ibool rec_clust, /* in: TRUE if rec is in the clustered
index instead of prebuilt->index */
const ulint* offsets) /* in: rec_get_offsets(rec) */
{ {
byte* buf; byte* buf;
ulint i; ulint i;
ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE); ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE);
ut_ad(rec_offs_validate(rec, NULL, offsets)); ut_ad(rec_offs_validate(rec, NULL, offsets));
ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
ut_a(!prebuilt->templ_contains_blob); ut_a(!prebuilt->templ_contains_blob);
if (prebuilt->fetch_cache[0] == NULL) { if (prebuilt->fetch_cache[0] == NULL) {
...@@ -3111,7 +3120,7 @@ row_sel_push_cache_row_for_mysql( ...@@ -3111,7 +3120,7 @@ row_sel_push_cache_row_for_mysql(
if (UNIV_UNLIKELY(!row_sel_store_mysql_rec( if (UNIV_UNLIKELY(!row_sel_store_mysql_rec(
prebuilt->fetch_cache[ prebuilt->fetch_cache[
prebuilt->n_fetch_cached], prebuilt->n_fetch_cached],
prebuilt, rec, offsets))) { prebuilt, rec, rec_clust, offsets))) {
ut_error; ut_error;
} }
...@@ -3500,7 +3509,8 @@ row_search_for_mysql( ...@@ -3500,7 +3509,8 @@ row_search_for_mysql(
rec, offsets)); rec, offsets));
#endif #endif
if (!row_sel_store_mysql_rec(buf, prebuilt, if (!row_sel_store_mysql_rec(buf, prebuilt,
rec, offsets)) { rec, FALSE,
offsets)) {
err = DB_TOO_BIG_RECORD; err = DB_TOO_BIG_RECORD;
/* We let the main loop to do the /* We let the main loop to do the
...@@ -4233,19 +4243,8 @@ requires_clust_rec: ...@@ -4233,19 +4243,8 @@ requires_clust_rec:
goto next_rec; goto next_rec;
} }
if (prebuilt->need_to_access_clustered) {
result_rec = clust_rec; result_rec = clust_rec;
ut_ad(rec_offs_validate(result_rec, clust_index, offsets));
ut_ad(rec_offs_validate(result_rec, clust_index,
offsets));
} else {
/* We used 'offsets' for the clust rec, recalculate
them for 'rec' */
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
result_rec = rec;
}
} else { } else {
result_rec = rec; result_rec = rec;
} }
...@@ -4256,6 +4255,7 @@ requires_clust_rec: ...@@ -4256,6 +4255,7 @@ requires_clust_rec:
ut_ad(rec_offs_validate(result_rec, ut_ad(rec_offs_validate(result_rec,
result_rec != rec ? clust_index : index, result_rec != rec ? clust_index : index,
offsets)); offsets));
ut_ad(!rec_get_deleted_flag(result_rec, comp));
if ((match_mode == ROW_SEL_EXACT if ((match_mode == ROW_SEL_EXACT
|| prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD) || prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD)
...@@ -4276,7 +4276,7 @@ requires_clust_rec: ...@@ -4276,7 +4276,7 @@ requires_clust_rec:
cursor. */ cursor. */
row_sel_push_cache_row_for_mysql(prebuilt, result_rec, row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
offsets); result_rec != rec, offsets);
if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) { if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) {
goto got_row; goto got_row;
...@@ -4284,15 +4284,31 @@ requires_clust_rec: ...@@ -4284,15 +4284,31 @@ requires_clust_rec:
goto next_rec; goto next_rec;
} else { } else {
if (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE) { if (UNIV_UNLIKELY
(prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE)) {
/* CHECK TABLE: fetch the row */
if (result_rec != rec
&& !prebuilt->need_to_access_clustered) {
/* We used 'offsets' for the clust
rec, recalculate them for 'rec' */
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED,
&heap);
result_rec = rec;
}
memcpy(buf + 4, result_rec memcpy(buf + 4, result_rec
- rec_offs_extra_size(offsets), - rec_offs_extra_size(offsets),
rec_offs_size(offsets)); rec_offs_size(offsets));
mach_write_to_4(buf, mach_write_to_4(buf,
rec_offs_extra_size(offsets) + 4); rec_offs_extra_size(offsets) + 4);
} else { } else {
if (!row_sel_store_mysql_rec(buf, prebuilt, /* Returning a row to MySQL */
result_rec, offsets)) {
if (!row_sel_store_mysql_rec(buf, prebuilt, result_rec,
result_rec != rec,
offsets)) {
err = DB_TOO_BIG_RECORD; err = DB_TOO_BIG_RECORD;
goto lock_wait_or_error; goto lock_wait_or_error;
......
...@@ -430,9 +430,11 @@ row_upd_changes_field_size_or_external( ...@@ -430,9 +430,11 @@ row_upd_changes_field_size_or_external(
} }
/*************************************************************** /***************************************************************
Replaces the new column values stored in the update vector to the record Replaces the new column values stored in the update vector to the
given. No field size changes are allowed. This function is used only for record given. No field size changes are allowed. This function is
a clustered index */ usually invoked on a clustered index. The only use case for a
secondary index is row_ins_sec_index_entry_by_modify() or its
counterpart in ibuf_insert_to_index_page(). */
void void
row_upd_rec_in_place( row_upd_rec_in_place(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment