Commit c0439b17 authored by Marko Mäkelä's avatar Marko Mäkelä

Merge 10.7 into 10.8

parents 5b1eb87b 22ab79c4
call mtr.add_suppression("mariadbd.*: File .*");
call mtr.add_suppression("Plugin 'file_key_management' .*");
call mtr.add_suppression("InnoDB: We do not continue the crash recovery");
call mtr.add_suppression("mariadbd.*: File ");
call mtr.add_suppression("Plugin 'file_key_management' ");
call mtr.add_suppression("InnoDB: Recovery cannot access file");
call mtr.add_suppression("InnoDB: Plugin initialization aborted");
call mtr.add_suppression("Plugin 'InnoDB' init function returned error\\.");
call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed.");
......
......@@ -3,9 +3,9 @@
# embedded does not support restart
-- source include/not_embedded.inc
call mtr.add_suppression("mariadbd.*: File .*");
call mtr.add_suppression("Plugin 'file_key_management' .*");
call mtr.add_suppression("InnoDB: We do not continue the crash recovery");
call mtr.add_suppression("mariadbd.*: File ");
call mtr.add_suppression("Plugin 'file_key_management' ");
call mtr.add_suppression("InnoDB: Recovery cannot access file");
call mtr.add_suppression("InnoDB: Plugin initialization aborted");
call mtr.add_suppression("Plugin 'InnoDB' init function returned error\\.");
call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed.");
......
......@@ -12,7 +12,7 @@ check table t1;
Table Op Msg_type Msg_text
test.t1 check Warning InnoDB: Index 'b' contains 990 entries, should be 1024.
test.t1 check error Corrupt
# restart
# restart: --innodb-force_recovery=0
SET GLOBAL innodb_fast_shutdown=0;
# restart
# restart: --innodb-force_recovery=0
DROP TABLE t1;
......@@ -11,6 +11,8 @@ call mtr.add_suppression("InnoDB: Failed to find tablespace for table `test`\\.`
call mtr.add_suppression("InnoDB: Allocated tablespace ID \\d+ for test.t1, old maximum was");
call mtr.add_suppression("InnoDB: Failed to find tablespace for table `mysql`\\.`transaction_registry` in the cache\\. Attempting to load the tablespace with space id");
call mtr.add_suppression("InnoDB: Allocated tablespace ID \\d+ for mysql.transaction_registry, old maximum was");
call mtr.add_suppression("InnoDB: Trying to read 4096 bytes");
call mtr.add_suppression("InnoDB: File './test/t1.ibd' is corrupted");
--enable_query_log
CREATE TABLE t1(
......@@ -106,7 +108,7 @@ truncate(FILE, $ps * $pages);
close(FILE) || die "Unable to close $file";
EOF
--let $restart_parameters=
--let $restart_parameters=--innodb-force_recovery=0
--source include/start_mysqld.inc
SET GLOBAL innodb_fast_shutdown=0;
--source include/restart_mysqld.inc
......
......@@ -1110,7 +1110,6 @@ void setup_connection_thread_globals(THD *thd)
{
DBUG_EXECUTE_IF("CONNECT_wait", {
extern Dynamic_array<MYSQL_SOCKET> listen_sockets;
DBUG_ASSERT(listen_sockets.size());
while (listen_sockets.size())
my_sleep(1000);
});
......
......@@ -2687,7 +2687,7 @@ buf_page_get_low(
re_evict:
if (mode != BUF_GET_IF_IN_POOL
&& mode != BUF_GET_IF_IN_POOL_OR_WATCH) {
} else if (!ibuf_debug) {
} else if (!ibuf_debug || recv_recovery_is_on()) {
} else if (fil_space_t* space = fil_space_t::get(page_id.space())) {
/* Try to evict the block from the buffer pool, to use the
insert buffer (change buffer) as much as possible. */
......
......@@ -2262,16 +2262,74 @@ static MY_ATTRIBUTE((warn_unused_result, nonnull))
bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur,
const dtuple_t* search_tuple, mtr_t* mtr);
/** Delete the change buffer records for the given page id
@param page_id page identifier */
static void ibuf_delete_recs(const page_id_t page_id)
{
if (!ibuf.index || srv_read_only_mode)
return;
dfield_t dfield[IBUF_REC_FIELD_METADATA];
dtuple_t tuple {0,IBUF_REC_FIELD_METADATA,IBUF_REC_FIELD_METADATA,
dfield,0,nullptr
#ifdef UNIV_DEBUG
,DATA_TUPLE_MAGIC_N
#endif
};
byte space_id[4], page_no[4];
mach_write_to_4(space_id, page_id.space());
mach_write_to_4(page_no, page_id.page_no());
dfield_set_data(&dfield[0], space_id, 4);
dfield_set_data(&dfield[1], field_ref_zero, 1);
dfield_set_data(&dfield[2], page_no, 4);
dtuple_set_types_binary(&tuple, IBUF_REC_FIELD_METADATA);
mtr_t mtr;
loop:
btr_pcur_t pcur;
pcur.btr_cur.page_cur.index= ibuf.index;
ibuf_mtr_start(&mtr);
if (btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &pcur, 0, &mtr))
goto func_exit;
if (!btr_pcur_is_on_user_rec(&pcur))
{
ut_ad(btr_pcur_is_after_last_on_page(&pcur));
goto func_exit;
}
for (;;)
{
ut_ad(btr_pcur_is_on_user_rec(&pcur));
const rec_t* ibuf_rec = btr_pcur_get_rec(&pcur);
if (ibuf_rec_get_space(&mtr, ibuf_rec) != page_id.space()
|| ibuf_rec_get_page_no(&mtr, ibuf_rec) != page_id.page_no())
break;
/* Delete the record from ibuf */
if (ibuf_delete_rec(page_id, &pcur, &tuple, &mtr))
{
/* Deletion was pessimistic and mtr was committed:
we start from the beginning again */
ut_ad(mtr.has_committed());
goto loop;
}
if (btr_pcur_is_after_last_on_page(&pcur))
{
ibuf_mtr_commit(&mtr);
btr_pcur_close(&pcur);
goto loop;
}
}
func_exit:
ibuf_mtr_commit(&mtr);
btr_pcur_close(&pcur);
}
/** Merge the change buffer to some pages. */
static void ibuf_read_merge_pages(const uint32_t* space_ids,
const uint32_t* page_nos, ulint n_stored)
{
#ifndef DBUG_OFF
mem_heap_t* heap = mem_heap_create(512);
ulint dops[IBUF_OP_COUNT];
memset(dops, 0, sizeof(dops));
#endif
for (ulint i = 0; i < n_stored; i++) {
const uint32_t space_id = space_ids[i];
fil_space_t* s = fil_space_t::get(space_id);
......@@ -2294,28 +2352,36 @@ static void ibuf_read_merge_pages(const uint32_t* space_ids,
if (UNIV_LIKELY(page_nos[i] < size)) {
mtr.start();
dberr_t err;
buf_block_t *b =
buf_block_t *block =
buf_page_get_gen(page_id_t(space_id, page_nos[i]),
zip_size, RW_X_LATCH, nullptr,
BUF_GET_POSSIBLY_FREED,
&mtr, &err, true);
bool remove = !block
|| fil_page_get_type(block->page.frame)
!= FIL_PAGE_INDEX
|| !page_is_leaf(block->page.frame);
mtr.commit();
if (b) {
} else if (err == DB_TABLESPACE_DELETED) {
if (err == DB_TABLESPACE_DELETED) {
goto tablespace_deleted;
} else {
}
if (!remove) {
continue;
}
}
#ifndef DBUG_OFF
DBUG_EXECUTE_IF("ibuf_merge_corruption", goto work_around;);
if (srv_shutdown_state == SRV_SHUTDOWN_NONE
|| srv_fast_shutdown) {
continue;
}
/* The following code works around a hang when the
change buffer is corrupted, likely due to the race
condition in crash recovery that was fixed in
MDEV-24449. But, it also introduces corruption by
itself in the following scenario:
change buffer is corrupted, likely due to the
failure of ibuf_merge_or_delete_for_page() to
invoke ibuf_delete_recs() if (!bitmap_bits).
It also introduced corruption by itself in the
following scenario:
(1) We merged buffered changes in buf_page_get_gen()
(2) We committed the mini-transaction
......@@ -2324,67 +2390,16 @@ static void ibuf_read_merge_pages(const uint32_t* space_ids,
(5) Other threads buffer changes for that page.
(6) We will wrongly discard those newly buffered changes below.
This code will be available in debug builds, so that
users may try to fix a shutdown hang that occurs due
to a corrupted change buffer. */
To prevent this scenario, we will only invoke this code
on shutdown. A call to ibuf_max_size_update(0) will cause
ibuf_insert_low() to refuse to insert anything into the
change buffer. */
work_around:
/* Prevent an infinite loop, by removing entries from
the change buffer also in the case the bitmap bits were
the change buffer in the case the bitmap bits were
wrongly clear even though buffered changes exist. */
const dtuple_t* tuple = ibuf_search_tuple_build(
space_id, page_nos[i], heap);
loop:
btr_pcur_t pcur;
pcur.btr_cur.page_cur.index = ibuf.index;
ibuf_mtr_start(&mtr);
if (btr_pcur_open(tuple, PAGE_CUR_GE,
BTR_MODIFY_LEAF, &pcur, 0, &mtr)
!= DB_SUCCESS) {
goto done;
}
if (!btr_pcur_is_on_user_rec(&pcur)) {
ut_ad(btr_pcur_is_after_last_on_page(&pcur));
goto done;
}
for (;;) {
ut_ad(btr_pcur_is_on_user_rec(&pcur));
const rec_t* ibuf_rec = btr_pcur_get_rec(&pcur);
if (ibuf_rec_get_space(&mtr, ibuf_rec) != space_id
|| ibuf_rec_get_page_no(&mtr, ibuf_rec)
!= page_nos[i]) {
break;
}
dops[ibuf_rec_get_op_type(&mtr, ibuf_rec)]++;
/* Delete the record from ibuf */
if (ibuf_delete_rec(page_id_t(space_id, page_nos[i]),
&pcur, tuple, &mtr)) {
/* Deletion was pessimistic and mtr
was committed: we start from the
beginning again */
ut_ad(mtr.has_committed());
goto loop;
}
if (btr_pcur_is_after_last_on_page(&pcur)) {
ibuf_mtr_commit(&mtr);
goto loop;
}
}
done:
ibuf_mtr_commit(&mtr);
mem_heap_empty(heap);
#endif
ibuf_delete_recs(page_id_t(space_ids[i], page_nos[i]));
}
#ifndef DBUG_OFF
ibuf_add_ops(ibuf.n_discarded_ops, dops);
mem_heap_free(heap);
#endif
}
/** Contract the change buffer by reading pages to the buffer pool.
......@@ -2454,8 +2469,23 @@ ibuf_merge_space(
{
mtr_t mtr;
btr_pcur_t pcur;
mem_heap_t* heap = mem_heap_create(512);
dtuple_t* tuple = ibuf_search_tuple_build(space, 0, heap);
dfield_t dfield[IBUF_REC_FIELD_METADATA];
dtuple_t tuple {0, IBUF_REC_FIELD_METADATA,
IBUF_REC_FIELD_METADATA,dfield,0,nullptr
#ifdef UNIV_DEBUG
, DATA_TUPLE_MAGIC_N
#endif
};
byte space_id[4];
mach_write_to_4(space_id, space);
dfield_set_data(&dfield[0], space_id, 4);
dfield_set_data(&dfield[1], field_ref_zero, 1);
dfield_set_data(&dfield[2], field_ref_zero, 4);
dtuple_set_types_binary(&tuple, IBUF_REC_FIELD_METADATA);
ulint n_pages = 0;
ut_ad(space < SRV_SPACE_ID_UPPER_BOUND);
......@@ -2466,13 +2496,11 @@ ibuf_merge_space(
/* Position the cursor on the first matching record. */
pcur.btr_cur.page_cur.index = ibuf.index;
dberr_t err = btr_pcur_open(tuple, PAGE_CUR_GE,
dberr_t err = btr_pcur_open(&tuple, PAGE_CUR_GE,
BTR_SEARCH_LEAF, &pcur, 0, &mtr);
ut_ad(err != DB_SUCCESS || page_validate(btr_pcur_get_page(&pcur),
ibuf.index));
mem_heap_free(heap);
ulint sum_sizes = 0;
uint32_t pages[IBUF_MAX_N_PAGES_MERGED];
uint32_t spaces[IBUF_MAX_N_PAGES_MERGED];
......@@ -4165,6 +4193,11 @@ dberr_t ibuf_merge_or_delete_for_page(buf_block_t *block,
ibuf_reset_bitmap(block, page_id, zip_size, &mtr);
ibuf_mtr_commit(&mtr);
bitmap_bits = 0;
if (!block
|| btr_page_get_index_id(block->page.frame)
!= DICT_IBUF_ID_MIN + IBUF_SPACE_ID) {
ibuf_delete_recs(page_id);
}
}
if (!bitmap_bits) {
......@@ -4390,22 +4423,31 @@ in DISCARD TABLESPACE, IMPORT TABLESPACE, or read-ahead.
@param[in] space missing or to-be-discarded tablespace */
void ibuf_delete_for_discarded_space(uint32_t space)
{
mem_heap_t* heap;
btr_pcur_t pcur;
dtuple_t* search_tuple;
const rec_t* ibuf_rec;
mtr_t mtr;
/* Counts for discarded operations. */
ulint dops[IBUF_OP_COUNT];
heap = mem_heap_create(512);
dfield_t dfield[IBUF_REC_FIELD_METADATA];
dtuple_t search_tuple {0,IBUF_REC_FIELD_METADATA,
IBUF_REC_FIELD_METADATA,dfield,0
,nullptr
#ifdef UNIV_DEBUG
,DATA_TUPLE_MAGIC_N
#endif /* UNIV_DEBUG */
};
byte space_id[4];
mach_write_to_4(space_id, space);
dfield_set_data(&dfield[0], space_id, 4);
dfield_set_data(&dfield[1], field_ref_zero, 1);
dfield_set_data(&dfield[2], field_ref_zero, 4);
dtuple_set_types_binary(&search_tuple, IBUF_REC_FIELD_METADATA);
/* Use page number 0 to build the search tuple so that we get the
cursor positioned at the first entry for this space id */
search_tuple = ibuf_search_tuple_build(space, 0, heap);
memset(dops, 0, sizeof(dops));
pcur.btr_cur.page_cur.index = ibuf.index;
......@@ -4415,7 +4457,7 @@ void ibuf_delete_for_discarded_space(uint32_t space)
/* Position pcur in the insert buffer at the first entry for the
space */
if (btr_pcur_open_on_user_rec(search_tuple, PAGE_CUR_GE,
if (btr_pcur_open_on_user_rec(&search_tuple, PAGE_CUR_GE,
BTR_MODIFY_LEAF, &pcur, &mtr)
!= DB_SUCCESS) {
goto leave_loop;
......@@ -4443,7 +4485,7 @@ void ibuf_delete_for_discarded_space(uint32_t space)
/* Delete the record from ibuf */
if (ibuf_delete_rec(page_id_t(space, page_no),
&pcur, search_tuple, &mtr)) {
&pcur, &search_tuple, &mtr)) {
/* Deletion was pessimistic and mtr was committed:
we start from the beginning again */
......@@ -4464,8 +4506,6 @@ void ibuf_delete_for_discarded_space(uint32_t space)
ut_free(pcur.old_rec_buf);
ibuf_add_ops(ibuf.n_discarded_ops, dops);
mem_heap_free(heap);
}
/******************************************************************//**
......@@ -4640,15 +4680,20 @@ dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space)
void ibuf_set_bitmap_for_bulk_load(buf_block_t *block, mtr_t *mtr, bool reset)
{
ut_a(page_is_leaf(block->page.frame));
const page_id_t id{block->page.id()};
const auto zip_size= block->zip_size();
if (buf_block_t *bitmap_page=
ibuf_bitmap_get_map_page(block->page.id(), block->zip_size(), mtr))
if (buf_block_t *bitmap_page= ibuf_bitmap_get_map_page(id, zip_size, mtr))
{
if (ibuf_bitmap_page_get_bits(bitmap_page->page.frame, id, zip_size,
IBUF_BITMAP_BUFFERED, mtr))
ibuf_delete_recs(id);
ulint free_val= reset ? 0 : ibuf_index_page_calc_free(block);
/* FIXME: update the bitmap byte only once! */
ibuf_bitmap_page_set_bits<IBUF_BITMAP_FREE>
(bitmap_page, block->page.id(), block->physical_size(), free_val, mtr);
(bitmap_page, id, block->physical_size(), free_val, mtr);
ibuf_bitmap_page_set_bits<IBUF_BITMAP_BUFFERED>
(bitmap_page, block->page.id(), block->physical_size(), false, mtr);
(bitmap_page, id, block->physical_size(), false, mtr);
}
}
......@@ -967,8 +967,9 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown()
if (srv_fast_shutdown == 2 || !srv_was_started) {
if (!srv_read_only_mode && srv_was_started) {
ib::info() << "Executing innodb_fast_shutdown=2."
" Next startup will execute crash recovery!";
sql_print_information(
"InnoDB: Executing innodb_fast_shutdown=2."
" Next startup will execute crash recovery!");
/* In this fastest shutdown we do not flush the
buffer pool:
......
......@@ -986,6 +986,20 @@ bool recv_sys_t::recover_deferred(recv_sys_t::map::iterator &p,
DB_SUCCESS == os_file_punch_hole(node->handle, 0, 4096) &&
!my_test_if_thinly_provisioned(node->handle);
#endif
/* Mimic fil_node_t::read_page0() in case the file exists and
has already been extended to a larger size. */
ut_ad(node->size == size);
const os_offset_t file_size= os_file_get_size(node->handle);
if (file_size != os_offset_t(-1))
{
const uint32_t n_pages=
uint32_t(file_size / fil_space_t::physical_size(flags));
if (n_pages > size)
{
space->size= node->size= n_pages;
space->set_committed_size();
}
}
if (!os_file_set_size(node->name, node->handle,
(size * fil_space_t::physical_size(flags)) &
~4095ULL, is_sparse))
......@@ -1325,40 +1339,22 @@ static void fil_name_process(const char *name, ulint len, uint32_t space_id,
case FIL_LOAD_INVALID:
ut_ad(space == NULL);
if (srv_force_recovery == 0) {
sql_print_warning(
"InnoDB: We do not continue the crash"
" recovery, because the table may"
" become corrupt if we cannot apply"
" the log records in the InnoDB log to"
" it. To fix the problem and start"
" mariadbd:");
sql_print_information(
"InnoDB: 1) If there is a permission"
" problem in the file and mysqld"
" cannot open the file, you should"
" modify the permissions.");
sql_print_information(
"InnoDB: 2) If the tablespace is not"
" needed, or you can restore an older"
" version from a backup, then you can"
" remove the .ibd file, and use"
" --innodb_force_recovery=1 to force"
" startup without this file.");
sql_print_information(
"InnoDB: 3) If the file system or the"
" disk is broken, and you cannot"
" remove the .ibd file, you can set"
" --innodb_force_recovery.");
sql_print_error("InnoDB: Recovery cannot access"
" file %s (tablespace "
UINT32PF ")", name, space_id);
sql_print_information("InnoDB: You may set "
"innodb_force_recovery=1"
" to ignore this and"
" possibly get a"
" corrupted database.");
recv_sys.set_corrupt_fs();
break;
}
sql_print_information(
"InnoDB: innodb_force_recovery was set to %lu."
" Continuing crash recovery even though"
" we cannot access the files for tablespace "
UINT32PF ".", srv_force_recovery, space_id);
break;
sql_print_warning("InnoDB: Ignoring changes to"
" file %s (tablespace " UINT32PF ")"
" due to innodb_force_recovery",
name, space_id);
}
}
}
......
......@@ -704,7 +704,7 @@ row_mysql_handle_errors(
" table. You have to dump + drop + reimport the"
" table or, in a case of widespread corruption,"
" dump all InnoDB tables and recreate the whole"
" tablespace. If the mysqld server crashes after"
" tablespace. If the mariadbd server crashes after"
" the startup or when you dump the tables. "
<< FORCE_RECOVERY_MSG;
goto rollback_to_savept;
......
......@@ -1525,6 +1525,10 @@ void srv_shutdown(bool ibuf_merge)
if (ibuf_merge) {
srv_main_thread_op_info = "doing insert buffer merge";
/* Disallow the use of change buffer to
avoid a race condition with
ibuf_read_merge_pages() */
ibuf_max_size_update(0);
log_free_check();
n_read = ibuf_contract();
srv_shutdown_print(now, n_read);
......
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2018, MariaDB Corporation.
Copyright (c) 2017, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment