Commit 95fa7fab authored by Jimmy Yang's avatar Jimmy Yang

Fix bug #11830883, SUPPORT "CORRUPTED" BIT FOR INNODB TABLES AND INDEXES.

Also addressed issues in bug #11745133, where we could mark a table
corrupted instead of crashing the server when found a corrupted buffer/page
if the table created with innodb_file_per_table on.
parent 887ac677
...@@ -447,7 +447,8 @@ enum ha_base_keytype { ...@@ -447,7 +447,8 @@ enum ha_base_keytype {
#define HA_ERR_WRONG_CRC 176 /* Wrong CRC on page */ #define HA_ERR_WRONG_CRC 176 /* Wrong CRC on page */
#define HA_ERR_TOO_MANY_CONCURRENT_TRXS 177 /*Too many active concurrent transactions */ #define HA_ERR_TOO_MANY_CONCURRENT_TRXS 177 /*Too many active concurrent transactions */
#define HA_ERR_INDEX_COL_TOO_LONG 178 /* Index column length exceeds limit */ #define HA_ERR_INDEX_COL_TOO_LONG 178 /* Index column length exceeds limit */
#define HA_ERR_LAST 178 /* Copy of last error nr */ #define HA_ERR_INDEX_CORRUPT 179 /* Index corrupted */
#define HA_ERR_LAST 179 /* Copy of last error nr */
/* Number of different errors */ /* Number of different errors */
#define HA_ERR_ERRORS (HA_ERR_LAST - HA_ERR_FIRST + 1) #define HA_ERR_ERRORS (HA_ERR_LAST - HA_ERR_FIRST + 1)
......
set names utf8;
CREATE TABLE corrupt_bit_test_ā(
a INT AUTO_INCREMENT PRIMARY KEY,
b CHAR(100),
c INT,
z INT,
INDEX(b))
ENGINE=InnoDB;
INSERT INTO corrupt_bit_test_ā VALUES(0,'x',1, 1);
CREATE UNIQUE INDEX idxā ON corrupt_bit_test_ā(c, b);
CREATE UNIQUE INDEX idxē ON corrupt_bit_test_ā(z, b);
SELECT * FROM corrupt_bit_test_ā;
a b c z
1 x 1 1
select @@unique_checks;
@@unique_checks
0
select @@innodb_change_buffering_debug;
@@innodb_change_buffering_debug
1
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+1,z+1 FROM corrupt_bit_test_ā;
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+10,z+10 FROM corrupt_bit_test_ā;
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+20,z+20 FROM corrupt_bit_test_ā;
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+50,z+50 FROM corrupt_bit_test_ā;
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+100,z+100 FROM corrupt_bit_test_ā;
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+200,z+200 FROM corrupt_bit_test_ā;
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+400,z+400 FROM corrupt_bit_test_ā;
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+800,z+800 FROM corrupt_bit_test_ā;
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+1600,z+1600 FROM corrupt_bit_test_ā;
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+4000,z+4000 FROM corrupt_bit_test_ā;
select count(*) from corrupt_bit_test_ā;
count(*)
1024
CREATE INDEX idx3 ON corrupt_bit_test_ā(b, c);
INSERT INTO corrupt_bit_test_ā VALUES(13000,'x',1,1);
CREATE INDEX idx4 ON corrupt_bit_test_ā(b, z);
check table corrupt_bit_test_ā;
Table Op Msg_type Msg_text
test.corrupt_bit_test_ā check Warning InnoDB: The B-tree of index "idxā" is corrupted.
test.corrupt_bit_test_ā check Warning InnoDB: The B-tree of index "idxē" is corrupted.
test.corrupt_bit_test_ā check error Corrupt
select c from corrupt_bit_test_ā;
ERROR HY000: Incorrect key file for table 'corrupt_bit_test_ā'; try to repair it
select z from corrupt_bit_test_ā;
ERROR HY000: Incorrect key file for table 'corrupt_bit_test_ā'; try to repair it
show warnings;
Level Code Message
Warning 179 InnoDB: Index "idxē" for table "test/corrupt_bit_test_@1s" is marked as corrupted
Error 1034 Incorrect key file for table 'corrupt_bit_test_ā'; try to repair it
insert into corrupt_bit_test_ā values (10001, "a", 20001, 20001);
select * from corrupt_bit_test_ā use index(primary) where a = 10001;
a b c z
10001 a 20001 20001
begin;
insert into corrupt_bit_test_ā values (10002, "a", 20002, 20002);
delete from corrupt_bit_test_ā where a = 10001;
insert into corrupt_bit_test_ā values (10001, "a", 20001, 20001);
rollback;
drop index idxā on corrupt_bit_test_ā;
check table corrupt_bit_test_ā;
Table Op Msg_type Msg_text
test.corrupt_bit_test_ā check Warning InnoDB: Index "idxē" is marked as corrupted
test.corrupt_bit_test_ā check error Corrupt
set names utf8;
select z from corrupt_bit_test_ā;
ERROR HY000: Incorrect key file for table 'corrupt_bit_test_ā'; try to repair it
drop index idxē on corrupt_bit_test_ā;
select z from corrupt_bit_test_ā limit 10;
z
20001
1
1
2
11
12
21
22
31
32
drop table corrupt_bit_test_ā;
SET GLOBAL innodb_change_buffering_debug = 0;
#
# Test for persistent corrupt bit for corrupted index and table
#
-- source include/have_innodb.inc
# This test needs debug server
--source include/have_debug.inc
-- disable_query_log
# This test setup is extracted from bug56680.test:
# The flag innodb_change_buffering_debug is only available in debug builds.
# It instructs InnoDB to try to evict pages from the buffer pool when
# change buffering is possible, so that the change buffer will be used
# whenever possible.
-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE
SET @innodb_change_buffering_debug_orig = @@innodb_change_buffering_debug;
-- error 0,ER_UNKNOWN_SYSTEM_VARIABLE
SET GLOBAL innodb_change_buffering_debug = 1;
# Turn off Unique Check to create corrupted index with dup key
SET UNIQUE_CHECKS=0;
-- enable_query_log
set names utf8;
CREATE TABLE corrupt_bit_test_ā(
a INT AUTO_INCREMENT PRIMARY KEY,
b CHAR(100),
c INT,
z INT,
INDEX(b))
ENGINE=InnoDB;
INSERT INTO corrupt_bit_test_ā VALUES(0,'x',1, 1);
# This is the first unique index we intend to corrupt
CREATE UNIQUE INDEX idxā ON corrupt_bit_test_ā(c, b);
# This is the second unique index we intend to corrupt
CREATE UNIQUE INDEX idxē ON corrupt_bit_test_ā(z, b);
SELECT * FROM corrupt_bit_test_ā;
select @@unique_checks;
select @@innodb_change_buffering_debug;
# Create enough rows for the table, so that the insert buffer will be
# used for modifying the secondary index page. There must be multiple
# index pages, because changes to the root page are never buffered.
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+1,z+1 FROM corrupt_bit_test_ā;
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+10,z+10 FROM corrupt_bit_test_ā;
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+20,z+20 FROM corrupt_bit_test_ā;
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+50,z+50 FROM corrupt_bit_test_ā;
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+100,z+100 FROM corrupt_bit_test_ā;
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+200,z+200 FROM corrupt_bit_test_ā;
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+400,z+400 FROM corrupt_bit_test_ā;
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+800,z+800 FROM corrupt_bit_test_ā;
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+1600,z+1600 FROM corrupt_bit_test_ā;
INSERT INTO corrupt_bit_test_ā SELECT 0,b,c+4000,z+4000 FROM corrupt_bit_test_ā;
select count(*) from corrupt_bit_test_ā;
CREATE INDEX idx3 ON corrupt_bit_test_ā(b, c);
# Create a dup key error on index "idxē" and "idxā" by inserting a dup value
INSERT INTO corrupt_bit_test_ā VALUES(13000,'x',1,1);
# creating an index should succeed even if other secondary indexes are corrupted
CREATE INDEX idx4 ON corrupt_bit_test_ā(b, z);
# Check table will find the unique indexes corrupted
# with dup key
check table corrupt_bit_test_ā;
# This selection intend to use the corrupted index. Expect to fail
-- error ER_NOT_KEYFILE
select c from corrupt_bit_test_ā;
-- error ER_NOT_KEYFILE
select z from corrupt_bit_test_ā;
show warnings;
# Since corrupted index is a secondary index, we only disable such
# index and allow other DML to proceed
insert into corrupt_bit_test_ā values (10001, "a", 20001, 20001);
# This does not use the corrupted index, expect to succeed
select * from corrupt_bit_test_ā use index(primary) where a = 10001;
# Some more DMLs
begin;
insert into corrupt_bit_test_ā values (10002, "a", 20002, 20002);
delete from corrupt_bit_test_ā where a = 10001;
insert into corrupt_bit_test_ā values (10001, "a", 20001, 20001);
rollback;
# Drop one corrupted index before reboot
drop index idxā on corrupt_bit_test_ā;
check table corrupt_bit_test_ā;
# Shut down the server
-- exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
-- shutdown_server 20
-- source include/wait_until_disconnected.inc
# Restart the server
-- disable_query_log
--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
--enable_reconnect
--source include/wait_until_connected_again.inc
--disable_reconnect
-- enable_query_log
set names utf8;
# The index is marked as suspect in Sys_indexes too, so after server
# reboot, the attempt to use the index will fail too.
-- error ER_NOT_KEYFILE
select z from corrupt_bit_test_ā;
# Drop the corrupted index
drop index idxē on corrupt_bit_test_ā;
# Now select back to normal
select z from corrupt_bit_test_ā limit 10;
# Drop table
drop table corrupt_bit_test_ā;
-- error 0, ER_UNKNOWN_SYSTEM_VARIABLE
SET GLOBAL innodb_change_buffering_debug = 0;
...@@ -11,7 +11,9 @@ There should be *no* long test name listed below: ...@@ -11,7 +11,9 @@ There should be *no* long test name listed below:
select variable_name as `There should be *no* variables listed below:` from t2 select variable_name as `There should be *no* variables listed below:` from t2
left join t1 on variable_name=test_name where test_name is null; left join t1 on variable_name=test_name where test_name is null;
There should be *no* variables listed below: There should be *no* variables listed below:
INNODB_FORCE_LOAD_CORRUPTED
INNODB_LARGE_PREFIX INNODB_LARGE_PREFIX
INNODB_FORCE_LOAD_CORRUPTED
INNODB_LARGE_PREFIX INNODB_LARGE_PREFIX
drop table t1; drop table t1;
drop table t2; drop table t2;
...@@ -81,7 +81,8 @@ static const char *handler_error_messages[]= ...@@ -81,7 +81,8 @@ static const char *handler_error_messages[]=
"File to short; Expected more data in file", "File to short; Expected more data in file",
"Read page with wrong checksum", "Read page with wrong checksum",
"Too many active concurrent transactions", "Too many active concurrent transactions",
"Index column length exceeds limit" "Index column length exceeds limit",
"Index corrupted"
}; };
extern void my_handler_error_register(void); extern void my_handler_error_register(void);
......
...@@ -358,6 +358,7 @@ int ha_init_errors(void) ...@@ -358,6 +358,7 @@ int ha_init_errors(void)
SETMSG(HA_ERR_AUTOINC_ERANGE, ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE)); SETMSG(HA_ERR_AUTOINC_ERANGE, ER_DEFAULT(ER_WARN_DATA_OUT_OF_RANGE));
SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS)); SETMSG(HA_ERR_TOO_MANY_CONCURRENT_TRXS, ER_DEFAULT(ER_TOO_MANY_CONCURRENT_TRXS));
SETMSG(HA_ERR_INDEX_COL_TOO_LONG, ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG)); SETMSG(HA_ERR_INDEX_COL_TOO_LONG, ER_DEFAULT(ER_INDEX_COLUMN_TOO_LONG));
SETMSG(HA_ERR_INDEX_CORRUPT, ER_DEFAULT(ER_INDEX_CORRUPT));
/* Register the error messages for use with my_error(). */ /* Register the error messages for use with my_error(). */
return my_error_register(get_handler_errmsgs, HA_ERR_FIRST, HA_ERR_LAST); return my_error_register(get_handler_errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
...@@ -2865,6 +2866,9 @@ void handler::print_error(int error, myf errflag) ...@@ -2865,6 +2866,9 @@ void handler::print_error(int error, myf errflag)
case HA_ERR_INDEX_COL_TOO_LONG: case HA_ERR_INDEX_COL_TOO_LONG:
textno= ER_INDEX_COLUMN_TOO_LONG; textno= ER_INDEX_COLUMN_TOO_LONG;
break; break;
case HA_ERR_INDEX_CORRUPT:
textno= ER_INDEX_CORRUPT;
break;
default: default:
{ {
/* The error was "unknown" to this function. /* The error was "unknown" to this function.
......
...@@ -6415,3 +6415,5 @@ ER_ERROR_IN_TRIGGER_BODY ...@@ -6415,3 +6415,5 @@ ER_ERROR_IN_TRIGGER_BODY
ER_ERROR_IN_UNKNOWN_TRIGGER_BODY ER_ERROR_IN_UNKNOWN_TRIGGER_BODY
eng "Unknown trigger has an error in its body: '%-.256s'" eng "Unknown trigger has an error in its body: '%-.256s'"
ER_INDEX_CORRUPT
eng "Index %s is corrupted"
...@@ -3473,6 +3473,55 @@ buf_page_create( ...@@ -3473,6 +3473,55 @@ buf_page_create(
return(block); return(block);
} }
/********************************************************************//**
Mark a table with the specified space pointed by bpage->space corrupted.
Also remove the bpage from LRU list.
@return TRUE if successful */
static
ibool
buf_mark_space_corrupt(
/*===================*/
buf_page_t* bpage) /*!< in: pointer to the block in question */
{
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
const ibool uncompressed = (buf_page_get_state(bpage)
== BUF_BLOCK_FILE_PAGE);
ulint space = bpage->space;
ulint offset = bpage->offset;
ibool ret = TRUE;
/* First unfix and release lock on the bpage */
buf_pool_mutex_enter(buf_pool);
mutex_enter(buf_page_get_mutex(bpage));
ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
ut_ad(bpage->buf_fix_count == 0);
/* Set BUF_IO_NONE before we remove the block from LRU list */
buf_page_set_io_fix(bpage, BUF_IO_NONE);
if (uncompressed) {
rw_lock_x_unlock_gen(
&((buf_block_t*) bpage)->lock,
BUF_IO_READ);
}
/* Find the table with specified space id, and mark it corrupted */
if (dict_set_corrupted_by_space(space)) {
ut_ad(bpage->space == space && bpage->offset == offset);
buf_LRU_free_one_page(bpage);
} else {
ret = FALSE;
}
ut_ad(buf_pool->n_pend_reads > 0);
buf_pool->n_pend_reads--;
mutex_exit(buf_page_get_mutex(bpage));
buf_pool_mutex_exit(buf_pool);
return(ret);
}
/********************************************************************//** /********************************************************************//**
Completes an asynchronous read or write request of a file page to or from Completes an asynchronous read or write request of a file page to or from
the buffer pool. */ the buffer pool. */
...@@ -3598,10 +3647,19 @@ buf_page_io_complete( ...@@ -3598,10 +3647,19 @@ buf_page_io_complete(
"InnoDB: about forcing recovery.\n", stderr); "InnoDB: about forcing recovery.\n", stderr);
if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) { if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
fputs("InnoDB: Ending processing because of" /* If page space id is larger than TRX_SYS_SPACE
(0), we will attempt to mark the corresponding
table as corrupted instead of crashing server */
if (bpage->space > TRX_SYS_SPACE
&& buf_mark_space_corrupt(bpage)) {
return;
} else {
fputs("InnoDB: Ending processing"
" because of"
" a corrupt database page.\n", " a corrupt database page.\n",
stderr); stderr);
exit(1); ut_error;
}
} }
} }
......
...@@ -1885,6 +1885,22 @@ buf_LRU_block_free_hashed_page( ...@@ -1885,6 +1885,22 @@ buf_LRU_block_free_hashed_page(
buf_LRU_block_free_non_file_page(block); buf_LRU_block_free_non_file_page(block);
} }
/******************************************************************//**
Remove one page from LRU list and put it to free list */
UNIV_INTERN
void
buf_LRU_free_one_page(
/*==================*/
buf_page_t* bpage) /*!< in/out: block, must contain a file page and
be in a state where it can be freed; there
may or may not be a hash index to the page */
{
if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
!= BUF_BLOCK_ZIP_FREE) {
buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
}
}
/**********************************************************************//** /**********************************************************************//**
Updates buf_pool->LRU_old_ratio for one buffer pool instance. Updates buf_pool->LRU_old_ratio for one buffer pool instance.
@return updated old_pct */ @return updated old_pct */
......
...@@ -54,6 +54,7 @@ UNIV_INTERN dict_index_t* dict_ind_compact; ...@@ -54,6 +54,7 @@ UNIV_INTERN dict_index_t* dict_ind_compact;
#include "row0merge.h" #include "row0merge.h"
#include "m_ctype.h" /* my_isspace() */ #include "m_ctype.h" /* my_isspace() */
#include "ha_prototypes.h" /* innobase_strcasecmp(), innobase_casedn_str()*/ #include "ha_prototypes.h" /* innobase_strcasecmp(), innobase_casedn_str()*/
#include "row0upd.h"
#include <ctype.h> #include <ctype.h>
...@@ -611,8 +612,7 @@ dict_table_get_on_id( ...@@ -611,8 +612,7 @@ dict_table_get_on_id(
{ {
dict_table_t* table; dict_table_t* table;
if (table_id <= DICT_FIELDS_ID if (trx->dict_operation_lock_mode == RW_X_LATCH) {
|| trx->dict_operation_lock_mode == RW_X_LATCH) {
/* Note: An X latch implies that the transaction /* Note: An X latch implies that the transaction
already owns the dictionary mutex. */ already owns the dictionary mutex. */
...@@ -5046,4 +5046,179 @@ dict_close(void) ...@@ -5046,4 +5046,179 @@ dict_close(void)
rw_lock_free(&dict_table_stats_latches[i]); rw_lock_free(&dict_table_stats_latches[i]);
} }
} }
/**********************************************************************//**
Find a table in dict_sys->table_LRU list with specified space id
@return table if found, NULL if not */
static
dict_table_t*
dict_find_table_by_space(
/*=====================*/
ulint space_id) /*!< in: space ID */
{
dict_table_t* table;
ulint num_item;
ulint count = 0;
ut_ad(space_id > 0);
table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
num_item = UT_LIST_GET_LEN(dict_sys->table_LRU);
/* This function intentionally does not acquire mutex as it is used
by error handling code in deep call stack as last means to avoid
killing the server, so it worth to risk some consequencies for
the action. */
while (table && count < num_item) {
if (table->space == space_id) {
return(table);
}
table = UT_LIST_GET_NEXT(table_LRU, table);
count++;
}
return(NULL);
}
/**********************************************************************//**
Flags a table with specified space_id corrupted in the data dictionary
cache
@return TRUE if successful */
UNIV_INTERN
ibool
dict_set_corrupted_by_space(
/*========================*/
ulint space_id) /*!< in: space ID */
{
dict_table_t* table;
table = dict_find_table_by_space(space_id);
if (!table) {
return(FALSE);
}
/* mark the table->corrupted bit only, since the caller
could be too deep in the stack for SYS_INDEXES update */
table->corrupted = TRUE;
return(TRUE);
}
/**********************************************************************//**
Flags an index corrupted both in the data dictionary cache
and in the SYS_INDEXES */
UNIV_INTERN
void
dict_set_corrupted(
/*===============*/
dict_index_t* index) /*!< in/out: index */
{
mem_heap_t* heap;
mtr_t mtr;
dict_index_t* sys_index;
dtuple_t* tuple;
dfield_t* dfield;
byte* buf;
const char* status;
btr_cur_t cursor;
ut_ad(index);
ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(!dict_table_is_comp(dict_sys->sys_tables));
ut_ad(!dict_table_is_comp(dict_sys->sys_indexes));
#ifdef UNIV_SYNC_DEBUG
ut_ad(sync_thread_levels_empty_except_dict());
#endif
/* Mark the table as corrupted only if the clustered index
is corrupted */
if (dict_index_is_clust(index)) {
index->table->corrupted = TRUE;
}
if (UNIV_UNLIKELY(dict_index_is_corrupted(index))) {
/* The index was already flagged corrupted. */
ut_ad(index->table->corrupted);
return;
}
heap = mem_heap_create(sizeof(dtuple_t) + 2 * (sizeof(dfield_t)
+ sizeof(que_fork_t) + sizeof(upd_node_t)
+ sizeof(upd_t) + 12));
mtr_start(&mtr);
index->type |= DICT_CORRUPT;
sys_index = UT_LIST_GET_FIRST(dict_sys->sys_indexes->indexes);
/* Find the index row in SYS_INDEXES */
tuple = dtuple_create(heap, 2);
dfield = dtuple_get_nth_field(tuple, 0);
buf = mem_heap_alloc(heap, 8);
mach_write_to_8(buf, index->table->id);
dfield_set_data(dfield, buf, 8);
dfield = dtuple_get_nth_field(tuple, 1);
buf = mem_heap_alloc(heap, 8);
mach_write_to_8(buf, index->id);
dfield_set_data(dfield, buf, 8);
dict_index_copy_types(tuple, sys_index, 2);
btr_cur_search_to_nth_level(sys_index, 0, tuple, PAGE_CUR_GE,
BTR_MODIFY_LEAF,
&cursor, 0, __FILE__, __LINE__, &mtr);
if (cursor.up_match == dtuple_get_n_fields(tuple)) {
/* UPDATE SYS_INDEXES SET TYPE=index->type
WHERE TABLE_ID=index->table->id AND INDEX_ID=index->id */
ulint len;
byte* field = rec_get_nth_field_old(
btr_cur_get_rec(&cursor),
DICT_SYS_INDEXES_TYPE_FIELD, &len);
if (len != 4) {
goto fail;
}
mlog_write_ulint(field, index->type, MLOG_4BYTES, &mtr);
status = " InnoDB: Flagged corruption of ";
} else {
fail:
status = " InnoDB: Unable to flag corruption of ";
}
mtr_commit(&mtr);
mem_heap_free(heap);
ut_print_timestamp(stderr);
fputs(status, stderr);
dict_index_name_print(stderr, NULL, index);
putc('\n', stderr);
}
/**********************************************************************//**
Flags an index corrupted in the data dictionary cache only. This
is used mostly to mark a corrupted index when index's own dictionary
is corrupted, and we force to load such index for repair purpose */
UNIV_INTERN
void
dict_set_corrupted_index_cache_only(
/*================================*/
dict_index_t* index) /*!< in/out: index */
{
ut_ad(index);
ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(!dict_table_is_comp(dict_sys->sys_tables));
ut_ad(!dict_table_is_comp(dict_sys->sys_indexes));
/* Mark the table as corrupted only if the clustered index
is corrupted */
if (dict_index_is_clust(index)) {
index->table->corrupted = TRUE;
}
index->type |= DICT_CORRUPT;
}
#endif /* !UNIV_HOTBACKUP */ #endif /* !UNIV_HOTBACKUP */
...@@ -52,6 +52,11 @@ static const char* SYSTEM_TABLE_NAME[] = { ...@@ -52,6 +52,11 @@ static const char* SYSTEM_TABLE_NAME[] = {
"SYS_FOREIGN", "SYS_FOREIGN",
"SYS_FOREIGN_COLS" "SYS_FOREIGN_COLS"
}; };
/* If this flag is TRUE, then we will load the cluster index's (and tables')
metadata even if it is marked as "corrupted". */
UNIV_INTERN my_bool srv_load_corrupted = FALSE;
/****************************************************************//** /****************************************************************//**
Compare the name of an index column. Compare the name of an index column.
@return TRUE if the i'th column of index is 'name'. */ @return TRUE if the i'th column of index is 'name'. */
...@@ -1324,6 +1329,9 @@ dict_load_index_low( ...@@ -1324,6 +1329,9 @@ dict_load_index_low(
goto err_len; goto err_len;
} }
type = mach_read_from_4(field); type = mach_read_from_4(field);
if (UNIV_UNLIKELY(type & (~0 << DICT_IT_BITS))) {
return("unknown SYS_INDEXES.TYPE bits");
}
field = rec_get_nth_field_old(rec, 7/*SPACE*/, &len); field = rec_get_nth_field_old(rec, 7/*SPACE*/, &len);
if (UNIV_UNLIKELY(len != 4)) { if (UNIV_UNLIKELY(len != 4)) {
...@@ -1423,16 +1431,47 @@ dict_load_indexes( ...@@ -1423,16 +1431,47 @@ dict_load_indexes(
goto next_rec; goto next_rec;
} else if (err_msg) { } else if (err_msg) {
fprintf(stderr, "InnoDB: %s\n", err_msg); fprintf(stderr, "InnoDB: %s\n", err_msg);
if (ignore_err & DICT_ERR_IGNORE_CORRUPT) {
goto next_rec;
}
error = DB_CORRUPTION; error = DB_CORRUPTION;
goto func_exit; goto func_exit;
} }
ut_ad(index); ut_ad(index);
/* Check whether the index is corrupted */
if (dict_index_is_corrupted(index)) {
ut_print_timestamp(stderr);
fputs(" InnoDB: ", stderr);
dict_index_name_print(stderr, NULL, index);
fputs(" is corrupted\n", stderr);
if (!srv_load_corrupted
&& !(ignore_err & DICT_ERR_IGNORE_CORRUPT)
&& dict_index_is_clust(index)) {
dict_mem_index_free(index);
error = DB_INDEX_CORRUPT;
goto func_exit;
} else {
/* We will load the index if
1) srv_load_corrupted is TRUE
2) ignore_err is set with
DICT_ERR_IGNORE_CORRUPT
3) if the index corrupted is a secondary
index */
ut_print_timestamp(stderr);
fputs(" InnoDB: load corrupted index ", stderr);
dict_index_name_print(stderr, NULL, index);
putc('\n', stderr);
}
}
/* We check for unsupported types first, so that the /* We check for unsupported types first, so that the
subsequent checks are relevant for the supported types. */ subsequent checks are relevant for the supported types. */
if (index->type & ~(DICT_CLUSTERED | DICT_UNIQUE)) { if (index->type & ~(DICT_CLUSTERED | DICT_UNIQUE
| DICT_CORRUPT)) {
fprintf(stderr, fprintf(stderr,
"InnoDB: Error: unknown type %lu" "InnoDB: Error: unknown type %lu"
" of index %s of table %s\n", " of index %s of table %s\n",
...@@ -1453,9 +1492,13 @@ dict_load_indexes( ...@@ -1453,9 +1492,13 @@ dict_load_indexes(
/* If caller can tolerate this error, /* If caller can tolerate this error,
we will continue to load the index and we will continue to load the index and
let caller deal with this error. However let caller deal with this error. However
mark the index and table corrupted */ mark the index and table corrupted. We
index->corrupted = TRUE; only need to mark such in the index
table->corrupted = TRUE; dictionary cache for such metadata corruption,
since we would always be able to set it
when loading the dictionary cache */
dict_set_corrupted_index_cache_only(index);
fprintf(stderr, fprintf(stderr,
"InnoDB: Index is corrupt but forcing" "InnoDB: Index is corrupt but forcing"
" load into data dictionary\n"); " load into data dictionary\n");
...@@ -1495,9 +1538,10 @@ dict_load_indexes( ...@@ -1495,9 +1538,10 @@ dict_load_indexes(
index->name, table->name); index->name, table->name);
/* If the force recovery flag is set, and /* If the force recovery flag is set, and
if the failed index is not the primary index, we if the failed index is not the clustered index,
will continue and open other indexes */ we will continue and open other indexes */
if (srv_force_recovery if ((srv_force_recovery
|| srv_load_corrupted)
&& !dict_index_is_clust(index)) { && !dict_index_is_clust(index)) {
error = DB_SUCCESS; error = DB_SUCCESS;
goto next_rec; goto next_rec;
...@@ -1812,6 +1856,30 @@ dict_load_table( ...@@ -1812,6 +1856,30 @@ dict_load_table(
err = dict_load_indexes(table, heap, ignore_err); err = dict_load_indexes(table, heap, ignore_err);
if (err == DB_INDEX_CORRUPT) {
/* Refuse to load the table if the table has a corrupted
cluster index */
if (!srv_load_corrupted) {
fprintf(stderr, "InnoDB: Error: Load table ");
ut_print_name(stderr, NULL, TRUE, table->name);
fprintf(stderr, " failed, the table has corrupted"
" clustered indexes. Turn on"
" 'innodb_force_load_corrupted'"
" to drop it\n");
dict_table_remove_from_cache(table);
table = NULL;
goto func_exit;
} else {
dict_index_t* clust_index;
clust_index = dict_table_get_first_index(table);
if (dict_index_is_corrupted(clust_index)) {
table->corrupted = TRUE;
}
}
}
/* Initialize table foreign_child value. Its value could be /* Initialize table foreign_child value. Its value could be
changed when dict_load_foreigns() is called below */ changed when dict_load_foreigns() is called below */
table->fk_max_recusive_level = 0; table->fk_max_recusive_level = 0;
...@@ -1841,6 +1909,12 @@ dict_load_table( ...@@ -1841,6 +1909,12 @@ dict_load_table(
|| !dict_index_is_clust(index)) { || !dict_index_is_clust(index)) {
dict_table_remove_from_cache(table); dict_table_remove_from_cache(table);
table = NULL; table = NULL;
} else if (dict_index_is_corrupted(index)) {
/* It is possible we force to load a corrupted
clustered index if srv_load_corrupted is set.
Mark the table as corrupted in this case */
table->corrupted = TRUE;
} }
} }
#if 0 #if 0
...@@ -1867,6 +1941,7 @@ dict_load_table( ...@@ -1867,6 +1941,7 @@ dict_load_table(
mutex_exit(&dict_foreign_err_mutex); mutex_exit(&dict_foreign_err_mutex);
} }
#endif /* 0 */ #endif /* 0 */
func_exit:
mem_heap_free(heap); mem_heap_free(heap);
return(table); return(table);
......
...@@ -1043,6 +1043,8 @@ convert_error_code_to_mysql( ...@@ -1043,6 +1043,8 @@ convert_error_code_to_mysql(
#endif /* HA_ERR_TOO_MANY_CONCURRENT_TRXS */ #endif /* HA_ERR_TOO_MANY_CONCURRENT_TRXS */
case DB_UNSUPPORTED: case DB_UNSUPPORTED:
return(HA_ERR_UNSUPPORTED); return(HA_ERR_UNSUPPORTED);
case DB_INDEX_CORRUPT:
return(HA_ERR_INDEX_CORRUPT);
} }
} }
...@@ -2078,6 +2080,27 @@ innobase_convert_name( ...@@ -2078,6 +2080,27 @@ innobase_convert_name(
} }
/*****************************************************************//**
A wrapper function of innobase_convert_name(), convert a table or
index name to the MySQL system_charset_info (UTF-8) and quote it if needed.
@return pointer to the end of buf */
static inline
void
innobase_format_name(
/*==================*/
char* buf, /*!< out: buffer for converted identifier */
ulint buflen, /*!< in: length of buf, in bytes */
const char* name, /*!< in: index or table name to format */
ibool is_index_name) /*!< in: index name */
{
const char* bufend;
bufend = innobase_convert_name(buf, buflen, name, strlen(name),
NULL, is_index_name);
buf[bufend - buf] = '\0';
}
/**********************************************************************//** /**********************************************************************//**
Determines if the currently running transaction has been interrupted. Determines if the currently running transaction has been interrupted.
@return TRUE if interrupted */ @return TRUE if interrupted */
...@@ -5664,12 +5687,14 @@ ha_innobase::index_read( ...@@ -5664,12 +5687,14 @@ ha_innobase::index_read(
index = prebuilt->index; index = prebuilt->index;
if (UNIV_UNLIKELY(index == NULL)) { if (UNIV_UNLIKELY(index == NULL) || dict_index_is_corrupted(index)) {
prebuilt->index_usable = FALSE; prebuilt->index_usable = FALSE;
DBUG_RETURN(HA_ERR_CRASHED); DBUG_RETURN(HA_ERR_CRASHED);
} }
if (UNIV_UNLIKELY(!prebuilt->index_usable)) { if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
DBUG_RETURN(HA_ERR_TABLE_DEF_CHANGED); DBUG_RETURN(dict_index_is_corrupted(index)
? HA_ERR_INDEX_CORRUPT
: HA_ERR_TABLE_DEF_CHANGED);
} }
/* Note that if the index for which the search template is built is not /* Note that if the index for which the search template is built is not
...@@ -5855,10 +5880,33 @@ ha_innobase::change_active_index( ...@@ -5855,10 +5880,33 @@ ha_innobase::change_active_index(
prebuilt->index); prebuilt->index);
if (UNIV_UNLIKELY(!prebuilt->index_usable)) { if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
push_warning_printf(user_thd, MYSQL_ERROR::WARN_LEVEL_WARN, if (dict_index_is_corrupted(prebuilt->index)) {
char index_name[MAX_FULL_NAME_LEN + 1];
char table_name[MAX_FULL_NAME_LEN + 1];
innobase_format_name(
index_name, sizeof index_name,
prebuilt->index->name, TRUE);
innobase_format_name(
table_name, sizeof table_name,
prebuilt->index->table->name, FALSE);
push_warning_printf(
user_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
HA_ERR_INDEX_CORRUPT,
"InnoDB: Index %s for table %s is"
" marked as corrupted",
index_name, table_name);
DBUG_RETURN(1);
} else {
push_warning_printf(
user_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
HA_ERR_TABLE_DEF_CHANGED, HA_ERR_TABLE_DEF_CHANGED,
"InnoDB: insufficient history for index %u", "InnoDB: insufficient history for index %u",
keynr); keynr);
}
/* The caller seems to ignore this. Thus, we must check /* The caller seems to ignore this. Thus, we must check
this again in row_search_for_mysql(). */ this again in row_search_for_mysql(). */
DBUG_RETURN(2); DBUG_RETURN(2);
...@@ -7518,6 +7566,10 @@ ha_innobase::records_in_range( ...@@ -7518,6 +7566,10 @@ ha_innobase::records_in_range(
n_rows = HA_POS_ERROR; n_rows = HA_POS_ERROR;
goto func_exit; goto func_exit;
} }
if (dict_index_is_corrupted(index)) {
n_rows = HA_ERR_INDEX_CORRUPT;
goto func_exit;
}
if (UNIV_UNLIKELY(!row_merge_is_index_usable(prebuilt->trx, index))) { if (UNIV_UNLIKELY(!row_merge_is_index_usable(prebuilt->trx, index))) {
n_rows = HA_ERR_TABLE_DEF_CHANGED; n_rows = HA_ERR_TABLE_DEF_CHANGED;
goto func_exit; goto func_exit;
...@@ -8184,6 +8236,7 @@ ha_innobase::check( ...@@ -8184,6 +8236,7 @@ ha_innobase::check(
ulint n_rows_in_table = ULINT_UNDEFINED; ulint n_rows_in_table = ULINT_UNDEFINED;
ibool is_ok = TRUE; ibool is_ok = TRUE;
ulint old_isolation_level; ulint old_isolation_level;
ibool table_corrupted;
DBUG_ENTER("ha_innobase::check"); DBUG_ENTER("ha_innobase::check");
DBUG_ASSERT(thd == ha_thd()); DBUG_ASSERT(thd == ha_thd());
...@@ -8225,6 +8278,14 @@ ha_innobase::check( ...@@ -8225,6 +8278,14 @@ ha_innobase::check(
prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ; prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ;
/* Check whether the table is already marked as corrupted
before running the check table */
table_corrupted = prebuilt->table->corrupted;
/* Reset table->corrupted bit so that check table can proceed to
do additional check */
prebuilt->table->corrupted = FALSE;
/* Enlarge the fatal lock wait timeout during CHECK TABLE. */ /* Enlarge the fatal lock wait timeout during CHECK TABLE. */
mutex_enter(&kernel_mutex); mutex_enter(&kernel_mutex);
srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */ srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */
...@@ -8233,6 +8294,7 @@ ha_innobase::check( ...@@ -8233,6 +8294,7 @@ ha_innobase::check(
for (index = dict_table_get_first_index(prebuilt->table); for (index = dict_table_get_first_index(prebuilt->table);
index != NULL; index != NULL;
index = dict_table_get_next_index(index)) { index = dict_table_get_next_index(index)) {
char index_name[MAX_FULL_NAME_LEN + 1];
#if 0 #if 0
fputs("Validating index ", stderr); fputs("Validating index ", stderr);
ut_print_name(stderr, trx, FALSE, index->name); ut_print_name(stderr, trx, FALSE, index->name);
...@@ -8241,11 +8303,16 @@ ha_innobase::check( ...@@ -8241,11 +8303,16 @@ ha_innobase::check(
if (!btr_validate_index(index, prebuilt->trx)) { if (!btr_validate_index(index, prebuilt->trx)) {
is_ok = FALSE; is_ok = FALSE;
innobase_format_name(
index_name, sizeof index_name,
prebuilt->index->name, TRUE);
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
ER_NOT_KEYFILE, ER_NOT_KEYFILE,
"InnoDB: The B-tree of" "InnoDB: The B-tree of"
" index '%-.200s' is corrupted.", " index %s is corrupted.",
index->name); index_name);
continue; continue;
} }
...@@ -8258,11 +8325,26 @@ ha_innobase::check( ...@@ -8258,11 +8325,26 @@ ha_innobase::check(
prebuilt->trx, prebuilt->index); prebuilt->trx, prebuilt->index);
if (UNIV_UNLIKELY(!prebuilt->index_usable)) { if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, innobase_format_name(
index_name, sizeof index_name,
prebuilt->index->name, TRUE);
if (dict_index_is_corrupted(prebuilt->index)) {
push_warning_printf(
user_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
HA_ERR_INDEX_CORRUPT,
"InnoDB: Index %s is marked as"
" corrupted",
index_name);
is_ok = FALSE;
} else {
push_warning_printf(
thd, MYSQL_ERROR::WARN_LEVEL_WARN,
HA_ERR_TABLE_DEF_CHANGED, HA_ERR_TABLE_DEF_CHANGED,
"InnoDB: Insufficient history for" "InnoDB: Insufficient history for"
" index '%-.200s'", " index %s",
index->name); index_name);
}
continue; continue;
} }
...@@ -8276,12 +8358,19 @@ ha_innobase::check( ...@@ -8276,12 +8358,19 @@ ha_innobase::check(
prebuilt->select_lock_type = LOCK_NONE; prebuilt->select_lock_type = LOCK_NONE;
if (!row_check_index_for_mysql(prebuilt, index, &n_rows)) { if (!row_check_index_for_mysql(prebuilt, index, &n_rows)) {
innobase_format_name(
index_name, sizeof index_name,
index->name, TRUE);
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
ER_NOT_KEYFILE, ER_NOT_KEYFILE,
"InnoDB: The B-tree of" "InnoDB: The B-tree of"
" index '%-.200s' is corrupted.", " index %s is corrupted.",
index->name); index_name);
is_ok = FALSE; is_ok = FALSE;
row_mysql_lock_data_dictionary(prebuilt->trx);
dict_set_corrupted(index);
row_mysql_unlock_data_dictionary(prebuilt->trx);
} }
if (thd_killed(user_thd)) { if (thd_killed(user_thd)) {
...@@ -8308,6 +8397,20 @@ ha_innobase::check( ...@@ -8308,6 +8397,20 @@ ha_innobase::check(
} }
} }
if (table_corrupted) {
/* If some previous operation has marked the table as
corrupted in memory, and has not propagated such to
clustered index, we will do so here */
index = dict_table_get_first_index(prebuilt->table);
if (!dict_index_is_corrupted(index)) {
mutex_enter(&dict_sys->mutex);
dict_set_corrupted(index);
mutex_exit(&dict_sys->mutex);
}
prebuilt->table->corrupted = TRUE;
}
/* Restore the original isolation level */ /* Restore the original isolation level */
prebuilt->trx->isolation_level = old_isolation_level; prebuilt->trx->isolation_level = old_isolation_level;
...@@ -11101,6 +11204,11 @@ static MYSQL_SYSVAR_BOOL(large_prefix, innobase_large_prefix, ...@@ -11101,6 +11204,11 @@ static MYSQL_SYSVAR_BOOL(large_prefix, innobase_large_prefix,
"Support large index prefix length of REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes.", "Support large index prefix length of REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes.",
NULL, NULL, FALSE); NULL, NULL, FALSE);
static MYSQL_SYSVAR_BOOL(force_load_corrupted, srv_load_corrupted,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
"Force InnoDB to load metadata of corrupted table.",
NULL, NULL, FALSE);
static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog, static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
"Force InnoDB to not use next-key locking, to use only row-level locking.", "Force InnoDB to not use next-key locking, to use only row-level locking.",
...@@ -11360,6 +11468,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { ...@@ -11360,6 +11468,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(flush_method), MYSQL_SYSVAR(flush_method),
MYSQL_SYSVAR(force_recovery), MYSQL_SYSVAR(force_recovery),
MYSQL_SYSVAR(large_prefix), MYSQL_SYSVAR(large_prefix),
MYSQL_SYSVAR(force_load_corrupted),
MYSQL_SYSVAR(locks_unsafe_for_binlog), MYSQL_SYSVAR(locks_unsafe_for_binlog),
MYSQL_SYSVAR(lock_wait_timeout), MYSQL_SYSVAR(lock_wait_timeout),
#ifdef UNIV_LOG_ARCHIVE #ifdef UNIV_LOG_ARCHIVE
......
...@@ -203,6 +203,17 @@ void ...@@ -203,6 +203,17 @@ void
buf_LRU_stat_update(void); buf_LRU_stat_update(void);
/*=====================*/ /*=====================*/
/******************************************************************//**
Remove one page from LRU list and put it to free list */
UNIV_INTERN
void
buf_LRU_free_one_page(
/*==================*/
buf_page_t* bpage) /*!< in/out: block, must contain a file page and
be in a state where it can be freed; there
may or may not be a hash index to the page */
__attribute__((nonnull));
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/**********************************************************************//** /**********************************************************************//**
Validates the LRU list. Validates the LRU list.
......
...@@ -110,6 +110,7 @@ enum db_err { ...@@ -110,6 +110,7 @@ enum db_err {
foreign keys as its prefix columns */ foreign keys as its prefix columns */
DB_TOO_BIG_INDEX_COL, /* index column size exceeds maximum DB_TOO_BIG_INDEX_COL, /* index column size exceeds maximum
limit */ limit */
DB_INDEX_CORRUPT, /* we have corrupted index */
/* The following are partial failure codes */ /* The following are partial failure codes */
DB_FAIL = 1000, DB_FAIL = 1000,
......
...@@ -137,8 +137,10 @@ dict_create(void); ...@@ -137,8 +137,10 @@ dict_create(void);
header is created */ header is created */
/*-------------------------------------------------------------*/ /*-------------------------------------------------------------*/
/* The field number of the page number field in the sys_indexes table /* The field numbers in the SYS_TABLES clustered index */
clustered index */ #define DICT_SYS_TABLES_TYPE_FIELD 5
/* The field numbers in the SYS_INDEXES clustered index */
#define DICT_SYS_INDEXES_PAGE_NO_FIELD 8 #define DICT_SYS_INDEXES_PAGE_NO_FIELD 8
#define DICT_SYS_INDEXES_SPACE_NO_FIELD 7 #define DICT_SYS_INDEXES_SPACE_NO_FIELD 7
#define DICT_SYS_INDEXES_TYPE_FIELD 6 #define DICT_SYS_INDEXES_TYPE_FIELD 6
......
...@@ -585,6 +585,20 @@ dict_table_get_next_index( ...@@ -585,6 +585,20 @@ dict_table_get_next_index(
# define dict_table_get_next_index(index) UT_LIST_GET_NEXT(indexes, index) # define dict_table_get_next_index(index) UT_LIST_GET_NEXT(indexes, index)
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
#endif /* !UNIV_HOTBACKUP */ #endif /* !UNIV_HOTBACKUP */
/* Skip corrupted index */
#define dict_table_skip_corrupt_index(index) \
while (index && dict_index_is_corrupted(index)) { \
index = dict_table_get_next_index(index); \
}
/* Get the next non-corrupt index */
#define dict_table_next_uncorrupted_index(index) \
do { \
index = dict_table_get_next_index(index); \
dict_table_skip_corrupt_index(index); \
} while (0)
/********************************************************************//** /********************************************************************//**
Check whether the index is the clustered index. Check whether the index is the clustered index.
@return nonzero for clustered index, zero for other indexes */ @return nonzero for clustered index, zero for other indexes */
...@@ -593,7 +607,7 @@ ulint ...@@ -593,7 +607,7 @@ ulint
dict_index_is_clust( dict_index_is_clust(
/*================*/ /*================*/
const dict_index_t* index) /*!< in: index */ const dict_index_t* index) /*!< in: index */
__attribute__((pure)); __attribute__((nonnull, pure, warn_unused_result));
/********************************************************************//** /********************************************************************//**
Check whether the index is unique. Check whether the index is unique.
@return nonzero for unique index, zero for other indexes */ @return nonzero for unique index, zero for other indexes */
...@@ -602,7 +616,7 @@ ulint ...@@ -602,7 +616,7 @@ ulint
dict_index_is_unique( dict_index_is_unique(
/*=================*/ /*=================*/
const dict_index_t* index) /*!< in: index */ const dict_index_t* index) /*!< in: index */
__attribute__((pure)); __attribute__((nonnull, pure, warn_unused_result));
/********************************************************************//** /********************************************************************//**
Check whether the index is the insert buffer tree. Check whether the index is the insert buffer tree.
@return nonzero for insert buffer, zero for other indexes */ @return nonzero for insert buffer, zero for other indexes */
...@@ -611,7 +625,7 @@ ulint ...@@ -611,7 +625,7 @@ ulint
dict_index_is_ibuf( dict_index_is_ibuf(
/*===============*/ /*===============*/
const dict_index_t* index) /*!< in: index */ const dict_index_t* index) /*!< in: index */
__attribute__((pure)); __attribute__((nonnull, pure, warn_unused_result));
/********************************************************************//** /********************************************************************//**
Check whether the index is a secondary index or the insert buffer tree. Check whether the index is a secondary index or the insert buffer tree.
@return nonzero for insert buffer, zero for other indexes */ @return nonzero for insert buffer, zero for other indexes */
...@@ -620,7 +634,7 @@ ulint ...@@ -620,7 +634,7 @@ ulint
dict_index_is_sec_or_ibuf( dict_index_is_sec_or_ibuf(
/*======================*/ /*======================*/
const dict_index_t* index) /*!< in: index */ const dict_index_t* index) /*!< in: index */
__attribute__((pure)); __attribute__((nonnull, pure, warn_unused_result));
/********************************************************************//** /********************************************************************//**
Gets the number of user-defined columns in a table in the dictionary Gets the number of user-defined columns in a table in the dictionary
...@@ -630,7 +644,8 @@ UNIV_INLINE ...@@ -630,7 +644,8 @@ UNIV_INLINE
ulint ulint
dict_table_get_n_user_cols( dict_table_get_n_user_cols(
/*=======================*/ /*=======================*/
const dict_table_t* table); /*!< in: table */ const dict_table_t* table) /*!< in: table */
__attribute__((nonnull, pure, warn_unused_result));
/********************************************************************//** /********************************************************************//**
Gets the number of system columns in a table in the dictionary cache. Gets the number of system columns in a table in the dictionary cache.
@return number of system (e.g., ROW_ID) columns of a table */ @return number of system (e.g., ROW_ID) columns of a table */
...@@ -638,7 +653,8 @@ UNIV_INLINE ...@@ -638,7 +653,8 @@ UNIV_INLINE
ulint ulint
dict_table_get_n_sys_cols( dict_table_get_n_sys_cols(
/*======================*/ /*======================*/
const dict_table_t* table); /*!< in: table */ const dict_table_t* table) /*!< in: table */
__attribute__((nonnull, pure, warn_unused_result));
/********************************************************************//** /********************************************************************//**
Gets the number of all columns (also system) in a table in the dictionary Gets the number of all columns (also system) in a table in the dictionary
cache. cache.
...@@ -647,7 +663,8 @@ UNIV_INLINE ...@@ -647,7 +663,8 @@ UNIV_INLINE
ulint ulint
dict_table_get_n_cols( dict_table_get_n_cols(
/*==================*/ /*==================*/
const dict_table_t* table); /*!< in: table */ const dict_table_t* table) /*!< in: table */
__attribute__((nonnull, pure, warn_unused_result));
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
/********************************************************************//** /********************************************************************//**
Gets the nth column of a table. Gets the nth column of a table.
...@@ -1243,6 +1260,56 @@ void ...@@ -1243,6 +1260,56 @@ void
dict_close(void); dict_close(void);
/*============*/ /*============*/
/**********************************************************************//**
Check whether the table is corrupted.
@return nonzero for corrupted table, zero for valid tables */
UNIV_INLINE
ulint
dict_table_is_corrupted(
/*====================*/
const dict_table_t* table) /*!< in: table */
__attribute__((nonnull, pure, warn_unused_result));
/**********************************************************************//**
Check whether the index is corrupted.
@return nonzero for corrupted index, zero for valid indexes */
UNIV_INLINE
ulint
dict_index_is_corrupted(
/*====================*/
const dict_index_t* index) /*!< in: index */
__attribute__((nonnull, pure, warn_unused_result));
/**********************************************************************//**
Flags an index and table corrupted both in the data dictionary cache
and in the system table SYS_INDEXES. */
UNIV_INTERN
void
dict_set_corrupted(
/*===============*/
dict_index_t* index) /*!< in/out: index */
UNIV_COLD __attribute__((nonnull));
/**********************************************************************//**
Flags an index corrupted in the data dictionary cache only. This
is used mostly to mark a corrupted index when index's own dictionary
is corrupted, and we force to load such index for repair purpose */
UNIV_INTERN
void
dict_set_corrupted_index_cache_only(
/*================================*/
dict_index_t* index); /*!< in/out: index */
/**********************************************************************//**
Flags a table with specified space_id corrupted in the table dictionary
cache.
@return TRUE if successful */
UNIV_INTERN
ibool
dict_set_corrupted_by_space(
/*========================*/
ulint space_id); /*!< in: space ID */
#ifndef UNIV_NONINL #ifndef UNIV_NONINL
#include "dict0dict.ic" #include "dict0dict.ic"
#endif #endif
......
...@@ -27,6 +27,7 @@ Created 1/8/1996 Heikki Tuuri ...@@ -27,6 +27,7 @@ Created 1/8/1996 Heikki Tuuri
#ifndef UNIV_HOTBACKUP #ifndef UNIV_HOTBACKUP
#include "dict0load.h" #include "dict0load.h"
#include "rem0types.h" #include "rem0types.h"
#include "srv0srv.h"
/*********************************************************************//** /*********************************************************************//**
Gets the minimum number of bytes per character. Gets the minimum number of bytes per character.
...@@ -872,6 +873,18 @@ dict_table_get_low( ...@@ -872,6 +873,18 @@ dict_table_get_low(
table = dict_table_check_if_in_cache_low(table_name); table = dict_table_check_if_in_cache_low(table_name);
if (table && table->corrupted) {
fprintf(stderr, "InnoDB: table");
ut_print_name(stderr, NULL, TRUE, table->name);
if (srv_load_corrupted) {
fputs(" is corrupted, but"
" innodb_force_load_corrupted is set\n", stderr);
} else {
fputs(" is corrupted\n", stderr);
return(NULL);
}
}
if (table == NULL) { if (table == NULL) {
table = dict_load_table(table_name, TRUE, DICT_ERR_IGNORE_NONE); table = dict_load_table(table_name, TRUE, DICT_ERR_IGNORE_NONE);
} }
...@@ -937,4 +950,35 @@ dict_max_field_len_store_undo( ...@@ -937,4 +950,35 @@ dict_max_field_len_store_undo(
return(prefix_len); return(prefix_len);
} }
/********************************************************************//**
Check whether the table is corrupted.
@return nonzero for corrupted table, zero for valid tables */
UNIV_INLINE
ulint
dict_table_is_corrupted(
/*====================*/
const dict_table_t* table) /*!< in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
return(UNIV_UNLIKELY(table->corrupted));
}
/********************************************************************//**
Check whether the index is corrupted.
@return nonzero for corrupted index, zero for valid indexes */
UNIV_INLINE
ulint
dict_index_is_corrupted(
/*====================*/
const dict_index_t* index) /*!< in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(UNIV_UNLIKELY((index->type & DICT_CORRUPT)
|| (index->table && index->table->corrupted)));
}
#endif /* !UNIV_HOTBACKUP */ #endif /* !UNIV_HOTBACKUP */
...@@ -52,6 +52,11 @@ combination of types */ ...@@ -52,6 +52,11 @@ combination of types */
#define DICT_UNIVERSAL 4 /*!< index which can contain records from any #define DICT_UNIVERSAL 4 /*!< index which can contain records from any
other index */ other index */
#define DICT_IBUF 8 /*!< insert buffer tree */ #define DICT_IBUF 8 /*!< insert buffer tree */
#define DICT_CORRUPT 16 /*!< bit to store the corrupted flag
in SYS_INDEXES.TYPE */
#define DICT_IT_BITS 5 /*!< number of bits used for
SYS_INDEXES.TYPE */
/* @} */ /* @} */
/** Types for a table object */ /** Types for a table object */
...@@ -369,8 +374,9 @@ struct dict_index_struct{ ...@@ -369,8 +374,9 @@ struct dict_index_struct{
/*!< space where the index tree is placed */ /*!< space where the index tree is placed */
unsigned page:32;/*!< index tree root page number */ unsigned page:32;/*!< index tree root page number */
#endif /* !UNIV_HOTBACKUP */ #endif /* !UNIV_HOTBACKUP */
unsigned type:4; /*!< index type (DICT_CLUSTERED, DICT_UNIQUE, unsigned type:DICT_IT_BITS;
DICT_UNIVERSAL, DICT_IBUF) */ /*!< index type (DICT_CLUSTERED, DICT_UNIQUE,
DICT_UNIVERSAL, DICT_IBUF, DICT_CORRUPT) */
unsigned trx_id_offset:10;/*!< position of the trx id column unsigned trx_id_offset:10;/*!< position of the trx id column
in a clustered index record, if the fields in a clustered index record, if the fields
before it are known to be of a fixed size, before it are known to be of a fixed size,
...@@ -391,8 +397,6 @@ struct dict_index_struct{ ...@@ -391,8 +397,6 @@ struct dict_index_struct{
/*!< TRUE if this index is marked to be /*!< TRUE if this index is marked to be
dropped in ha_innobase::prepare_drop_index(), dropped in ha_innobase::prepare_drop_index(),
otherwise FALSE */ otherwise FALSE */
unsigned corrupted:1;
/*!< TRUE if the index object is corrupted */
dict_field_t* fields; /*!< array of field descriptions */ dict_field_t* fields; /*!< array of field descriptions */
#ifndef UNIV_HOTBACKUP #ifndef UNIV_HOTBACKUP
UT_LIST_NODE_T(dict_index_t) UT_LIST_NODE_T(dict_index_t)
......
...@@ -51,7 +51,8 @@ be or-ed together */ ...@@ -51,7 +51,8 @@ be or-ed together */
enum dict_err_ignore { enum dict_err_ignore {
DICT_ERR_IGNORE_NONE = 0, /*!< no error to ignore */ DICT_ERR_IGNORE_NONE = 0, /*!< no error to ignore */
DICT_ERR_IGNORE_INDEX_ROOT = 1, /*!< ignore error if index root DICT_ERR_IGNORE_INDEX_ROOT = 1, /*!< ignore error if index root
page is FIL_NUL or incorrect value */ page is FIL_NULL or incorrect value */
DICT_ERR_IGNORE_CORRUPT = 2, /*!< skip corrupted indexes */
DICT_ERR_IGNORE_ALL = 0xFFFF /*!< ignore all errors */ DICT_ERR_IGNORE_ALL = 0xFFFF /*!< ignore all errors */
}; };
......
...@@ -141,6 +141,10 @@ extern ulint srv_log_buffer_size; ...@@ -141,6 +141,10 @@ extern ulint srv_log_buffer_size;
extern ulong srv_flush_log_at_trx_commit; extern ulong srv_flush_log_at_trx_commit;
extern char srv_adaptive_flushing; extern char srv_adaptive_flushing;
/* If this flag is TRUE, then we will load the indexes' (and tables') metadata
even if they are marked as "corrupted". Mostly it is for DBA to process
corrupted index and table */
extern my_bool srv_load_corrupted;
/* The sort order table of the MySQL latin1_swedish_ci character set /* The sort order table of the MySQL latin1_swedish_ci character set
collation */ collation */
......
...@@ -568,7 +568,7 @@ opt_search_plan_for_table( ...@@ -568,7 +568,7 @@ opt_search_plan_for_table(
best_last_op = last_op; best_last_op = last_op;
} }
index = dict_table_get_next_index(index); dict_table_next_uncorrupted_index(index);
} }
plan->index = best_index; plan->index = best_index;
......
...@@ -118,6 +118,9 @@ ins_node_create_entry_list( ...@@ -118,6 +118,9 @@ ins_node_create_entry_list(
node->entry_sys_heap); node->entry_sys_heap);
UT_LIST_ADD_LAST(tuple_list, node->entry_list, entry); UT_LIST_ADD_LAST(tuple_list, node->entry_list, entry);
/* We will include all indexes (include those corrupted
secondary indexes) in the entry list. Filteration of
these corrupted index will be done in row_ins() */
index = dict_table_get_next_index(index); index = dict_table_get_next_index(index);
} }
} }
...@@ -2046,7 +2049,6 @@ row_ins_index_entry_low( ...@@ -2046,7 +2049,6 @@ row_ins_index_entry_low(
mtr_start(&mtr); mtr_start(&mtr);
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
goto function_exit; goto function_exit;
} }
...@@ -2431,6 +2433,13 @@ row_ins( ...@@ -2431,6 +2433,13 @@ row_ins(
node->index = dict_table_get_next_index(node->index); node->index = dict_table_get_next_index(node->index);
node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry); node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
/* Skip corrupted secondar index and its entry */
while (node->index && dict_index_is_corrupted(node->index)) {
node->index = dict_table_get_next_index(node->index);
node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
}
} }
ut_ad(node->entry == NULL); ut_ad(node->entry == NULL);
......
...@@ -2554,8 +2554,9 @@ row_merge_is_index_usable( ...@@ -2554,8 +2554,9 @@ row_merge_is_index_usable(
const trx_t* trx, /*!< in: transaction */ const trx_t* trx, /*!< in: transaction */
const dict_index_t* index) /*!< in: index to check */ const dict_index_t* index) /*!< in: index to check */
{ {
return(!trx->read_view return(!dict_index_is_corrupted(index)
|| read_view_sees_trx_id(trx->read_view, index->trx_id)); && (!trx->read_view
|| read_view_sees_trx_id(trx->read_view, index->trx_id)));
} }
/*********************************************************************//** /*********************************************************************//**
......
...@@ -3098,7 +3098,8 @@ row_drop_table_for_mysql( ...@@ -3098,7 +3098,8 @@ row_drop_table_for_mysql(
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */ #endif /* UNIV_SYNC_DEBUG */
table = dict_table_get_low_ignore_err(name, DICT_ERR_IGNORE_INDEX_ROOT); table = dict_table_get_low_ignore_err(
name, DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT);
if (!table) { if (!table) {
err = DB_TABLE_NOT_FOUND; err = DB_TABLE_NOT_FOUND;
......
...@@ -469,6 +469,13 @@ row_purge_del_mark( ...@@ -469,6 +469,13 @@ row_purge_del_mark(
heap = mem_heap_create(1024); heap = mem_heap_create(1024);
while (node->index != NULL) { while (node->index != NULL) {
/* skip corrupted secondary index */
dict_table_skip_corrupt_index(node->index);
if (!node->index) {
break;
}
index = node->index; index = node->index;
/* Build the index entry */ /* Build the index entry */
...@@ -516,6 +523,12 @@ row_purge_upd_exist_or_extern_func( ...@@ -516,6 +523,12 @@ row_purge_upd_exist_or_extern_func(
heap = mem_heap_create(1024); heap = mem_heap_create(1024);
while (node->index != NULL) { while (node->index != NULL) {
dict_table_skip_corrupt_index(node->index);
if (!node->index) {
break;
}
index = node->index; index = node->index;
if (row_upd_changes_ord_field_binary(node->index, node->update, if (row_upd_changes_ord_field_binary(node->index, node->update,
......
...@@ -3441,6 +3441,13 @@ row_search_for_mysql( ...@@ -3441,6 +3441,13 @@ row_search_for_mysql(
return(DB_MISSING_HISTORY); return(DB_MISSING_HISTORY);
} }
if (dict_index_is_corrupted(index)) {
#ifdef UNIV_SYNC_DEBUG
ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
#endif /* UNIV_SYNC_DEBUG */
return(DB_CORRUPTION);
}
if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) { if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
fprintf(stderr, fprintf(stderr,
"InnoDB: Error: trying to free a corrupt\n" "InnoDB: Error: trying to free a corrupt\n"
......
...@@ -328,6 +328,8 @@ row_undo_ins( ...@@ -328,6 +328,8 @@ row_undo_ins(
node->index = dict_table_get_next_index( node->index = dict_table_get_next_index(
dict_table_get_first_index(node->table)); dict_table_get_first_index(node->table));
dict_table_skip_corrupt_index(node->index);
while (node->index != NULL) { while (node->index != NULL) {
dtuple_t* entry; dtuple_t* entry;
ulint err; ulint err;
...@@ -355,7 +357,7 @@ row_undo_ins( ...@@ -355,7 +357,7 @@ row_undo_ins(
} }
} }
node->index = dict_table_get_next_index(node->index); dict_table_next_uncorrupted_index(node->index);
} }
log_free_check(); log_free_check();
......
...@@ -573,6 +573,14 @@ row_undo_mod_upd_del_sec( ...@@ -573,6 +573,14 @@ row_undo_mod_upd_del_sec(
heap = mem_heap_create(1024); heap = mem_heap_create(1024);
while (node->index != NULL) { while (node->index != NULL) {
/* Skip all corrupted secondary index */
dict_table_skip_corrupt_index(node->index);
if (!node->index) {
break;
}
index = node->index; index = node->index;
entry = row_build_index_entry(node->row, node->ext, entry = row_build_index_entry(node->row, node->ext,
...@@ -626,6 +634,13 @@ row_undo_mod_del_mark_sec( ...@@ -626,6 +634,13 @@ row_undo_mod_del_mark_sec(
heap = mem_heap_create(1024); heap = mem_heap_create(1024);
while (node->index != NULL) { while (node->index != NULL) {
/* Skip all corrupted secondary index */
dict_table_skip_corrupt_index(node->index);
if (!node->index) {
break;
}
index = node->index; index = node->index;
entry = row_build_index_entry(node->row, node->ext, entry = row_build_index_entry(node->row, node->ext,
...@@ -677,6 +692,13 @@ row_undo_mod_upd_exist_sec( ...@@ -677,6 +692,13 @@ row_undo_mod_upd_exist_sec(
heap = mem_heap_create(1024); heap = mem_heap_create(1024);
while (node->index != NULL) { while (node->index != NULL) {
/* Skip all corrupted secondary index */
dict_table_skip_corrupt_index(node->index);
if (!node->index) {
break;
}
index = node->index; index = node->index;
if (row_upd_changes_ord_field_binary(node->index, node->update, if (row_upd_changes_ord_field_binary(node->index, node->update,
...@@ -859,6 +881,9 @@ row_undo_mod( ...@@ -859,6 +881,9 @@ row_undo_mod(
node->index = dict_table_get_next_index( node->index = dict_table_get_next_index(
dict_table_get_first_index(node->table)); dict_table_get_first_index(node->table));
/* Skip all corrupted secondary index */
dict_table_skip_corrupt_index(node->index);
if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) { if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
err = row_undo_mod_upd_exist_sec(node, thr); err = row_undo_mod_upd_exist_sec(node, thr);
......
...@@ -2320,6 +2320,13 @@ row_upd( ...@@ -2320,6 +2320,13 @@ row_upd(
while (node->index != NULL) { while (node->index != NULL) {
/* Skip corrupted index */
dict_table_skip_corrupt_index(node->index);
if (!node->index) {
break;
}
log_free_check(); log_free_check();
err = row_upd_sec_step(node, thr); err = row_upd_sec_step(node, thr);
......
...@@ -712,6 +712,8 @@ ut_strerr( ...@@ -712,6 +712,8 @@ ut_strerr(
return("No index on referencing keys in referencing table"); return("No index on referencing keys in referencing table");
case DB_PARENT_NO_INDEX: case DB_PARENT_NO_INDEX:
return("No index on referenced keys in referenced table"); return("No index on referenced keys in referenced table");
case DB_INDEX_CORRUPT:
return("Index corrupted");
case DB_END_OF_INDEX: case DB_END_OF_INDEX:
return("End of index"); return("End of index");
/* do not add default: in order to produce a warning if new code /* do not add default: in order to produce a warning if new code
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment