Commit b59d4846 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-14126: Remove page_is_root()

The predicate page_is_root(), which was added in MariaDB Server 10.2.2,
is based on a wrong assumption.

Under some circumstances, InnoDB can transform B-trees into a degenerate
state where a non-leaf page has no sibling pages. Because of this,
we cannot assume that a page that has no siblings is the root page.
This bug will be tracked as MDEV-19022.

Because of the bug that may affect many InnoDB data files, we must remove
and replace the wrong predicate. Using the wrong predicate can cause
corruption. A leaf page is not allowed to be empty except if it is the
root page, and the entire table is empty.
parent 71c781bf
......@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2014, 2017, MariaDB Corporation.
Copyright (c) 2014, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -378,8 +378,7 @@ btr_root_adjust_on_import(
page = buf_block_get_frame(block);
page_zip = buf_block_get_page_zip(block);
if (!page_is_root(page)) {
if (!fil_page_index_page_check(page) || page_has_siblings(page)) {
err = DB_CORRUPTION;
} else if (dict_index_is_clust(index)) {
......@@ -1161,11 +1160,11 @@ btr_free_root_check(
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
if (fil_page_index_page_check(block->frame)
&& index_id == btr_page_get_index_id(block->frame)) {
&& index_id == btr_page_get_index_id(block->frame)) {
/* This should be a root page.
It should not be possible to reassign the same
index_id for some other index in the tablespace. */
ut_ad(page_is_root(block->frame));
ut_ad(!page_has_siblings(block->frame));
} else {
block = NULL;
}
......@@ -1358,7 +1357,8 @@ btr_free_but_not_root(
ibool finished;
mtr_t mtr;
ut_ad(page_is_root(block->frame));
ut_ad(fil_page_index_page_check(block->frame));
ut_ad(!page_has_siblings(block->frame));
leaf_loop:
mtr_start(&mtr);
mtr_set_log_mode(&mtr, log_mode);
......@@ -1430,7 +1430,6 @@ btr_free_if_exists(
return;
}
ut_ad(page_is_root(root->frame));
btr_free_but_not_root(root, mtr->get_log_mode());
mtr->set_named_space(page_id.space());
btr_free_root(root, mtr);
......@@ -1453,8 +1452,6 @@ btr_free(
page_id, page_size, RW_X_LATCH, &mtr);
if (block) {
ut_ad(page_is_root(block->frame));
btr_free_but_not_root(block, MTR_LOG_NO_REDO);
btr_free_root(block, &mtr);
}
......@@ -1614,12 +1611,17 @@ btr_page_reorganize_low(
ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
btr_assert_not_corrupted(block, index);
ut_ad(fil_page_index_page_check(block->frame));
ut_ad(index->is_dummy
|| block->page.id.space() == index->space);
ut_ad(index->is_dummy
|| block->page.id.page_no() != index->page
|| !page_has_siblings(page));
#ifdef UNIV_ZIP_DEBUG
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
data_size1 = page_get_data_size(page);
max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
/* Turn logging off */
mtr_log_t log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
......@@ -1677,7 +1679,7 @@ btr_page_reorganize_low(
|| page_get_max_trx_id(page) == 0
|| (dict_index_is_sec_or_ibuf(index)
? page_is_leaf(temp_page)
: page_is_root(temp_page)));
: block->page.id.page_no() == index->page));
/* If innodb_log_compressed_pages is ON, page reorganize should log the
compressed page image.*/
......@@ -1894,6 +1896,8 @@ btr_page_empty(
ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
ut_ad(page_zip == buf_block_get_page_zip(block));
ut_ad(!index->is_dummy);
ut_ad(index->space == block->page.id.space());
#ifdef UNIV_ZIP_DEBUG
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
......@@ -1906,7 +1910,8 @@ btr_page_empty(
/* Preserve PAGE_ROOT_AUTO_INC when creating a clustered index
root page. */
const ib_uint64_t autoinc
= dict_index_is_clust(index) && page_is_root(page)
= dict_index_is_clust(index)
&& index->page == block->page.id.page_no()
? page_get_autoinc(page)
: 0;
......@@ -4225,6 +4230,8 @@ btr_discard_only_page_on_level(
ulint page_level = 0;
trx_id_t max_trx_id;
ut_ad(!index->is_dummy);
/* Save the PAGE_MAX_TRX_ID from the leaf page. */
max_trx_id = page_get_max_trx_id(buf_block_get_frame(block));
......@@ -4235,11 +4242,10 @@ btr_discard_only_page_on_level(
ut_a(page_get_n_recs(page) == 1);
ut_a(page_level == btr_page_get_level(page, mtr));
ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
ut_ad(mtr_is_block_fix(
mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
ut_a(!page_has_siblings(page));
ut_ad(fil_page_index_page_check(page));
ut_ad(block->page.id.space() == index->space);
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
btr_search_drop_page_hash_index(block);
if (dict_index_is_spatial(index)) {
......@@ -4265,6 +4271,7 @@ btr_discard_only_page_on_level(
/* block is the root page, which must be empty, except
for the node pointer to the (now discarded) block(s). */
ut_ad(!page_has_siblings(block->frame));
#ifdef UNIV_BTR_DEBUG
if (!dict_index_is_ibuf(index)) {
......
......@@ -3,7 +3,7 @@
Copyright (c) 1994, 2018, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2015, 2018, MariaDB Corporation.
Copyright (c) 2015, 2019, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
......@@ -5113,14 +5113,14 @@ btr_cur_optimistic_delete_func(
ut_ad(flags == 0 || flags == BTR_CREATE_FLAG);
ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_is_block_fix(mtr, btr_cur_get_block(cursor),
MTR_MEMO_PAGE_X_FIX, cursor->index->table));
ut_ad(mtr->is_named_space(cursor->index->space));
ut_ad(!cursor->index->is_dummy);
/* This is intended only for leaf page deletions */
block = btr_cur_get_block(cursor);
ut_ad(block->page.id.space() == cursor->index->space);
ut_ad(page_is_leaf(buf_block_get_frame(block)));
ut_ad(!dict_index_is_online_ddl(cursor->index)
|| dict_index_is_clust(cursor->index)
......@@ -5242,8 +5242,10 @@ btr_cur_pessimistic_delete(
ut_ad(mtr_memo_contains_flagged(mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK
| MTR_MEMO_SX_LOCK));
ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr->is_named_space(index->space));
ut_ad(!index->is_dummy);
ut_ad(block->page.id.space() == index->space);
if (!has_reserved_extents) {
/* First reserve enough free space for the file segments
......
/*****************************************************************************
Copyright (C) 2013, 2014 Facebook, Inc. All Rights Reserved.
Copyright (C) 2014, 2017, MariaDB Corporation.
Copyright (C) 2014, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -181,7 +181,8 @@ btr_defragment_add_index(
return NULL;
}
ut_ad(page_is_root(page));
ut_ad(fil_page_index_page_check(page));
ut_ad(!page_has_siblings(page));
if (page_is_leaf(page)) {
// Index root is a leaf page, no need to defragment.
......
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2013, 2018, MariaDB Corporation.
Copyright (c) 2013, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -684,14 +684,19 @@ ulint
page_rec_get_heap_no(
/*=================*/
const rec_t* rec); /*!< in: the physical record */
/** Determine whether a page is an index root page.
/** Determine whether a page has any siblings.
@param[in] page page frame
@return true if the page is a root page of an index */
UNIV_INLINE
bool
page_is_root(
const page_t* page)
MY_ATTRIBUTE((warn_unused_result));
@return true if the page has any siblings */
inline bool page_has_siblings(const page_t* page)
{
compile_time_assert(!(FIL_PAGE_PREV % 8));
compile_time_assert(FIL_PAGE_NEXT == FIL_PAGE_PREV + 4);
compile_time_assert(FIL_NULL == 0xffffffff);
return *reinterpret_cast<const uint64_t*>(page + FIL_PAGE_PREV)
!= ~uint64_t(0);
}
/************************************************************//**
Gets the pointer to the next record on the page.
@return pointer to next record */
......
/*****************************************************************************
Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2016, 2017, MariaDB Corporation.
Copyright (c) 2016, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -91,7 +91,8 @@ UNIV_INLINE
ib_uint64_t
page_get_autoinc(const page_t* page)
{
ut_ad(page_is_root(page));
ut_ad(fil_page_index_page_check(page));
ut_ad(!page_has_siblings(page));
return(mach_read_from_8(PAGE_HEADER + PAGE_ROOT_AUTO_INC + page));
}
......@@ -277,31 +278,6 @@ page_rec_get_heap_no(
}
}
/** Determine whether a page is an index root page.
@param[in] page page frame
@return true if the page is a root page of an index */
UNIV_INLINE
bool
page_is_root(
const page_t* page)
{
#if FIL_PAGE_PREV % 8
# error FIL_PAGE_PREV must be 64-bit aligned
#endif
#if FIL_PAGE_NEXT != FIL_PAGE_PREV + 4
# error FIL_PAGE_NEXT must be adjacent to FIL_PAGE_PREV
#endif
#if FIL_NULL != 0xffffffff
# error FIL_NULL != 0xffffffff
#endif
/* Check that this is an index page and both the PREV and NEXT
pointers are FIL_NULL, because the root page does not have any
siblings. */
return(fil_page_index_page_check(page)
&& *reinterpret_cast<const ib_uint64_t*>(page + FIL_PAGE_PREV)
== IB_UINT64_MAX);
}
/** Determine whether an index page record is a user record.
@param[in] rec record in an index page
@return true if a user record */
......
......@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2018, MariaDB Corporation.
Copyright (c) 2018, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -1990,6 +1990,8 @@ page_parse_copy_rec_list_to_created_page(
return(rec_end);
}
ut_ad(fil_page_index_page_check(block->frame));
while (ptr < rec_end) {
ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr,
block, index, mtr);
......@@ -2045,6 +2047,7 @@ page_copy_rec_list_end_to_created_page(
ut_ad(page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW);
ut_ad(page_align(rec) != new_page);
ut_ad(page_rec_is_comp(rec) == page_is_comp(new_page));
ut_ad(fil_page_index_page_check(new_page));
if (page_rec_is_infimum(rec)) {
......
......@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2017, 2018, MariaDB Corporation.
Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -526,6 +526,8 @@ page_create_empty(
page_zip_des_t* page_zip= buf_block_get_page_zip(block);
ut_ad(fil_page_index_page_check(page));
ut_ad(!index->is_dummy);
ut_ad(block->page.id.space() == index->space);
/* Multiple transactions cannot simultaneously operate on the
same temp-table in parallel.
......@@ -536,7 +538,7 @@ page_create_empty(
&& page_is_leaf(page)) {
max_trx_id = page_get_max_trx_id(page);
ut_ad(max_trx_id);
} else if (page_is_root(page)) {
} else if (block->page.id.page_no() == index->page) {
/* Preserve PAGE_ROOT_AUTO_INC. */
max_trx_id = page_get_max_trx_id(page);
} else {
......
......@@ -2,7 +2,7 @@
Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2014, 2018, MariaDB Corporation.
Copyright (c) 2014, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -4727,7 +4727,7 @@ page_zip_reorganize(
clustered index root pages. */
ut_ad(page_get_max_trx_id(page) == 0
|| (dict_index_is_clust(index)
? page_is_root(temp_page)
? !page_has_siblings(temp_page)
: page_is_leaf(temp_page)));
/* Restore logging. */
......
......@@ -668,7 +668,7 @@ dberr_t FetchIndexRootPages::operator()(buf_block_t* block) UNIV_NOTHROW
return set_current_xdes(block->page.id.page_no(), page);
} else if (fil_page_index_page_check(page)
&& !is_free(block->page.id.page_no())
&& page_is_root(page)) {
&& !page_has_siblings(page)) {
index_id_t id = btr_page_get_index_id(page);
......@@ -1834,7 +1834,7 @@ PageConverter::update_index_page(
page, m_page_zip_ptr, m_index->m_srv_index->id, 0);
if (dict_index_is_clust(m_index->m_srv_index)) {
if (page_is_root(page)) {
if (block->page.id.page_no() == m_index->m_srv_index->page) {
/* Preserve the PAGE_ROOT_AUTO_INC. */
} else {
/* Clear PAGE_MAX_TRX_ID so that it can be
......@@ -1854,7 +1854,7 @@ PageConverter::update_index_page(
if (page_is_empty(page)) {
/* Only a root page can be empty. */
if (!page_is_root(page)) {
if (page_has_siblings(page)) {
// TODO: We should relax this and skip secondary
// indexes. Mark them as corrupt because they can
// always be rebuilt.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment