Commit 525e79b0 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-19022: InnoDB fails to cleanup useless B-tree pages

The test case for reproducing MDEV-14126 demonstrates that InnoDB can
end up with an index tree where a non-leaf page has only one child page.

The test case innodb.innodb_bug14676111 demonstrates that such pages
are sometimes unavoidable, because InnoDB does not implement any sort
of B-tree rotation.

But, there is no reason to allow a root page with only one child page.

btr_cur_node_ptr_delete(): Replaces btr_node_ptr_delete().

btr_page_get_father(): Declare globally.

btr_discard_only_page_on_level(): Declare with ATTRIBUTE_COLD.
It turns out that this function is not covered by the
innodb.innodb_bug14676111 test case after all.

btr_discard_page(): If the root page ends up having only one child
page, shrink the tree by invoking btr_lift_page_up().
parent ade0a0e9
......@@ -39,7 +39,7 @@ Table Op Msg_type Msg_text
test.t1 analyze status OK
select CLUST_INDEX_SIZE from information_schema.INNODB_SYS_TABLESTATS where NAME = 'test/t1';
CLUST_INDEX_SIZE
5
4
set global innodb_limit_optimistic_insert_debug = 10000;
connection con2;
rollback;
......@@ -50,7 +50,7 @@ Table Op Msg_type Msg_text
test.t1 analyze status OK
select CLUST_INDEX_SIZE from information_schema.INNODB_SYS_TABLESTATS where NAME = 'test/t1';
CLUST_INDEX_SIZE
3
2
begin;
insert into t1 values (2);
rollback;
......@@ -59,7 +59,7 @@ Table Op Msg_type Msg_text
test.t1 analyze status OK
select CLUST_INDEX_SIZE from information_schema.INNODB_SYS_TABLESTATS where NAME = 'test/t1';
CLUST_INDEX_SIZE
2
1
begin;
insert into t1 values (2);
rollback;
......
......@@ -1062,18 +1062,13 @@ btr_page_get_father_block(
return(btr_page_get_father_node_ptr(offsets, heap, cursor, mtr));
}
/************************************************************//**
Seeks to the upper level node pointer to a page.
It is assumed that mtr holds an x-latch on the tree. */
static
void
btr_page_get_father(
/*================*/
dict_index_t* index, /*!< in: b-tree index */
buf_block_t* block, /*!< in: child page in the index */
mtr_t* mtr, /*!< in: mtr */
btr_cur_t* cursor) /*!< out: cursor on node pointer record,
its page x-latched */
/** Seek to the parent page of a B-tree page.
@param[in,out] index b-tree
@param[in] block child page
@param[in,out] mtr mini-transaction
@param[out] cursor cursor pointing to the x-latched parent page */
void btr_page_get_father(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
btr_cur_t* cursor)
{
mem_heap_t* heap;
rec_t* rec
......@@ -3463,33 +3458,6 @@ btr_set_min_rec_mark(
}
}
/*************************************************************//**
Deletes on the upper level the node pointer to a page. */
void
btr_node_ptr_delete(
/*================*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: page whose node pointer is deleted */
mtr_t* mtr) /*!< in: mtr */
{
btr_cur_t cursor;
ibool compressed;
dberr_t err;
ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
/* Delete node pointer on father page */
btr_page_get_father(index, block, mtr, &cursor);
compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor,
BTR_CREATE_FLAG, false, mtr);
ut_a(err == DB_SUCCESS);
if (!compressed) {
btr_cur_compress_if_useful(&cursor, FALSE, mtr);
}
}
/*************************************************************//**
If page is the only on its level, this function moves its records to the
father page, thus reducing the tree height.
......@@ -3939,7 +3907,7 @@ btr_compress(
lock_rec_free_all_from_discard_page(block);
lock_mutex_exit();
} else {
btr_node_ptr_delete(index, block, mtr);
btr_cur_node_ptr_delete(&father_cursor, mtr);
if (!dict_table_is_locking_disabled(index->table)) {
lock_update_merge_left(
merge_block, orig_pred, block);
......@@ -4217,8 +4185,9 @@ btr_compress(
/*************************************************************//**
Discards a page that is the only page on its level. This will empty
the whole B-tree, leaving just an empty root page. This function
should never be reached, because btr_compress(), which is invoked in
should almost never be reached, because btr_compress(), which is invoked in
delete operations, calls btr_lift_page_up() to flatten the B-tree. */
ATTRIBUTE_COLD
static
void
btr_discard_only_page_on_level(
......@@ -4318,10 +4287,7 @@ btr_discard_page(
buf_block_t* block;
page_t* page;
rec_t* node_ptr;
#ifdef UNIV_DEBUG
btr_cur_t parent_cursor;
bool parent_is_different = false;
#endif
block = btr_cur_get_block(cursor);
index = btr_cur_get_index(cursor);
......@@ -4337,13 +4303,11 @@ btr_discard_page(
MONITOR_INC(MONITOR_INDEX_DISCARD);
#ifdef UNIV_DEBUG
if (dict_index_is_spatial(index)) {
rtr_page_get_father(index, block, mtr, cursor, &parent_cursor);
} else {
btr_page_get_father(index, block, mtr, &parent_cursor);
}
#endif
/* Decide the page which will inherit the locks */
......@@ -4351,7 +4315,7 @@ btr_discard_page(
right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr);
const page_size_t page_size(dict_table_page_size(index->table));
ut_d(bool parent_is_different = false);
if (left_page_no != FIL_NULL) {
merge_block = btr_block_get(
page_id_t(space, left_page_no), page_size,
......@@ -4407,15 +4371,9 @@ btr_discard_page(
}
if (dict_index_is_spatial(index)) {
btr_cur_t father_cursor;
/* Since rtr_node_ptr_delete doesn't contain get father
node ptr, so, we need to get father node ptr first and then
delete it. */
rtr_page_get_father(index, block, mtr, cursor, &father_cursor);
rtr_node_ptr_delete(index, &father_cursor, block, mtr);
rtr_node_ptr_delete(index, &parent_cursor, block, mtr);
} else {
btr_node_ptr_delete(index, block, mtr);
btr_cur_node_ptr_delete(&parent_cursor, mtr);
}
/* Remove the page from the level list */
......@@ -4453,6 +4411,12 @@ btr_discard_page(
we cannot use btr_check_node_ptr() */
ut_ad(parent_is_different
|| btr_check_node_ptr(index, merge_block, mtr));
if (btr_cur_get_block(&parent_cursor)->page.id.page_no() == index->page
&& !page_has_siblings(btr_cur_get_page(&parent_cursor))
&& page_get_n_recs(btr_cur_get_page(&parent_cursor)) == 1) {
btr_lift_page_up(index, merge_block, mtr);
}
}
#ifdef UNIV_BTR_PRINT
......
......@@ -5358,8 +5358,10 @@ btr_cur_pessimistic_delete(
on the page */
ulint level = btr_page_get_level(page, mtr);
btr_node_ptr_delete(index, block, mtr);
btr_cur_t cursor;
btr_page_get_father(index, block, mtr, &cursor);
btr_cur_node_ptr_delete(&cursor, mtr);
// FIXME: reuse the node_ptr from above
dtuple_t* node_ptr = dict_index_build_node_ptr(
index, next_rec, block->page.id.page_no(),
heap, level);
......@@ -5428,6 +5430,23 @@ btr_cur_pessimistic_delete(
return(ret);
}
/** Delete the node pointer in a parent page.
@param[in,out] parent cursor pointing to parent record
@param[in,out] mtr mini-transaction */
void btr_cur_node_ptr_delete(btr_cur_t* parent, mtr_t* mtr)
{
ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(parent),
MTR_MEMO_PAGE_X_FIX));
dberr_t err;
ibool compressed = btr_cur_pessimistic_delete(&err, TRUE, parent,
BTR_CREATE_FLAG, false,
mtr);
ut_a(err == DB_SUCCESS);
if (!compressed) {
btr_cur_compress_if_useful(parent, FALSE, mtr);
}
}
/*******************************************************************//**
Adds path information to the cursor for the current page, for which
the binary search has been performed. */
......
......@@ -484,6 +484,7 @@ btr_defragment_merge_pages(
ULINT_UNDEFINED);
}
}
btr_cur_t parent;
if (n_recs_to_move == n_recs) {
/* The whole page is merged with the previous page,
free it. */
......@@ -491,7 +492,8 @@ btr_defragment_merge_pages(
from_block);
btr_search_drop_page_hash_index(from_block);
btr_level_list_remove(space, page_size, (page_t*)from_page, index, mtr);
btr_node_ptr_delete(index, from_block, mtr);
btr_page_get_father(index, from_block, mtr, &parent);
btr_cur_node_ptr_delete(&parent, mtr);
/* btr_blob_dbg_remove(from_page, index,
"btr_defragment_n_pages"); */
btr_page_free(index, from_block, mtr);
......@@ -509,7 +511,9 @@ btr_defragment_merge_pages(
lock_update_split_and_merge(to_block,
orig_pred,
from_block);
btr_node_ptr_delete(index, from_block, mtr);
// FIXME: reuse the node_ptr!
btr_page_get_father(index, from_block, mtr, &parent);
btr_cur_node_ptr_delete(&parent, mtr);
rec = page_rec_get_next(
page_get_infimum_rec(from_page));
node_ptr = dict_index_build_node_ptr(
......
......@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
Copyright (c) 2014, 2018, MariaDB Corporation.
Copyright (c) 2014, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -545,14 +545,13 @@ btr_set_min_rec_mark(
rec_t* rec, /*!< in/out: record */
mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((nonnull));
/*************************************************************//**
Deletes on the upper level the node pointer to a page. */
void
btr_node_ptr_delete(
/*================*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: page whose node pointer is deleted */
mtr_t* mtr) /*!< in: mtr */
/** Seek to the parent page of a B-tree page.
@param[in,out] index b-tree
@param[in] block child page
@param[in,out] mtr mini-transaction
@param[out] cursor cursor pointing to the x-latched parent page */
void btr_page_get_father(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
btr_cur_t* cursor)
MY_ATTRIBUTE((nonnull));
#ifdef UNIV_DEBUG
/************************************************************//**
......
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation.
Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -529,6 +529,11 @@ btr_cur_pessimistic_delete(
bool rollback,/*!< in: performing rollback? */
mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((nonnull));
/** Delete the node pointer in a parent page.
@param[in,out] parent cursor pointing to parent record
@param[in,out] mtr mini-transaction */
void btr_cur_node_ptr_delete(btr_cur_t* parent, mtr_t* mtr)
MY_ATTRIBUTE((nonnull));
/***********************************************************//**
Parses a redo log record of updating a record in-place.
@return end of log record or NULL */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment