Commit 525e79b0 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-19022: InnoDB fails to cleanup useless B-tree pages

The test case for reproducing MDEV-14126 demonstrates that InnoDB can
end up with an index tree where a non-leaf page has only one child page.

The test case innodb.innodb_bug14676111 demonstrates that such pages
are sometimes unavoidable, because InnoDB does not implement any sort
of B-tree rotation.

But, there is no reason to allow a root page with only one child page.

btr_cur_node_ptr_delete(): Replaces btr_node_ptr_delete().

btr_page_get_father(): Declare globally.

btr_discard_only_page_on_level(): Declare with ATTRIBUTE_COLD.
It turns out that this function is not covered by the
innodb.innodb_bug14676111 test case after all.

btr_discard_page(): If the root page ends up having only one child
page, shrink the tree by invoking btr_lift_page_up().
parent ade0a0e9
...@@ -39,7 +39,7 @@ Table Op Msg_type Msg_text ...@@ -39,7 +39,7 @@ Table Op Msg_type Msg_text
test.t1 analyze status OK test.t1 analyze status OK
select CLUST_INDEX_SIZE from information_schema.INNODB_SYS_TABLESTATS where NAME = 'test/t1'; select CLUST_INDEX_SIZE from information_schema.INNODB_SYS_TABLESTATS where NAME = 'test/t1';
CLUST_INDEX_SIZE CLUST_INDEX_SIZE
5 4
set global innodb_limit_optimistic_insert_debug = 10000; set global innodb_limit_optimistic_insert_debug = 10000;
connection con2; connection con2;
rollback; rollback;
...@@ -50,7 +50,7 @@ Table Op Msg_type Msg_text ...@@ -50,7 +50,7 @@ Table Op Msg_type Msg_text
test.t1 analyze status OK test.t1 analyze status OK
select CLUST_INDEX_SIZE from information_schema.INNODB_SYS_TABLESTATS where NAME = 'test/t1'; select CLUST_INDEX_SIZE from information_schema.INNODB_SYS_TABLESTATS where NAME = 'test/t1';
CLUST_INDEX_SIZE CLUST_INDEX_SIZE
3 2
begin; begin;
insert into t1 values (2); insert into t1 values (2);
rollback; rollback;
...@@ -59,7 +59,7 @@ Table Op Msg_type Msg_text ...@@ -59,7 +59,7 @@ Table Op Msg_type Msg_text
test.t1 analyze status OK test.t1 analyze status OK
select CLUST_INDEX_SIZE from information_schema.INNODB_SYS_TABLESTATS where NAME = 'test/t1'; select CLUST_INDEX_SIZE from information_schema.INNODB_SYS_TABLESTATS where NAME = 'test/t1';
CLUST_INDEX_SIZE CLUST_INDEX_SIZE
2 1
begin; begin;
insert into t1 values (2); insert into t1 values (2);
rollback; rollback;
......
...@@ -1062,18 +1062,13 @@ btr_page_get_father_block( ...@@ -1062,18 +1062,13 @@ btr_page_get_father_block(
return(btr_page_get_father_node_ptr(offsets, heap, cursor, mtr)); return(btr_page_get_father_node_ptr(offsets, heap, cursor, mtr));
} }
/************************************************************//** /** Seek to the parent page of a B-tree page.
Seeks to the upper level node pointer to a page. @param[in,out] index b-tree
It is assumed that mtr holds an x-latch on the tree. */ @param[in] block child page
static @param[in,out] mtr mini-transaction
void @param[out] cursor cursor pointing to the x-latched parent page */
btr_page_get_father( void btr_page_get_father(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
/*================*/ btr_cur_t* cursor)
dict_index_t* index, /*!< in: b-tree index */
buf_block_t* block, /*!< in: child page in the index */
mtr_t* mtr, /*!< in: mtr */
btr_cur_t* cursor) /*!< out: cursor on node pointer record,
its page x-latched */
{ {
mem_heap_t* heap; mem_heap_t* heap;
rec_t* rec rec_t* rec
...@@ -3463,33 +3458,6 @@ btr_set_min_rec_mark( ...@@ -3463,33 +3458,6 @@ btr_set_min_rec_mark(
} }
} }
/*************************************************************//**
Deletes on the upper level the node pointer to a page. */
void
btr_node_ptr_delete(
/*================*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: page whose node pointer is deleted */
mtr_t* mtr) /*!< in: mtr */
{
btr_cur_t cursor;
ibool compressed;
dberr_t err;
ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
/* Delete node pointer on father page */
btr_page_get_father(index, block, mtr, &cursor);
compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor,
BTR_CREATE_FLAG, false, mtr);
ut_a(err == DB_SUCCESS);
if (!compressed) {
btr_cur_compress_if_useful(&cursor, FALSE, mtr);
}
}
/*************************************************************//** /*************************************************************//**
If page is the only on its level, this function moves its records to the If page is the only on its level, this function moves its records to the
father page, thus reducing the tree height. father page, thus reducing the tree height.
...@@ -3939,7 +3907,7 @@ btr_compress( ...@@ -3939,7 +3907,7 @@ btr_compress(
lock_rec_free_all_from_discard_page(block); lock_rec_free_all_from_discard_page(block);
lock_mutex_exit(); lock_mutex_exit();
} else { } else {
btr_node_ptr_delete(index, block, mtr); btr_cur_node_ptr_delete(&father_cursor, mtr);
if (!dict_table_is_locking_disabled(index->table)) { if (!dict_table_is_locking_disabled(index->table)) {
lock_update_merge_left( lock_update_merge_left(
merge_block, orig_pred, block); merge_block, orig_pred, block);
...@@ -4217,8 +4185,9 @@ btr_compress( ...@@ -4217,8 +4185,9 @@ btr_compress(
/*************************************************************//** /*************************************************************//**
Discards a page that is the only page on its level. This will empty Discards a page that is the only page on its level. This will empty
the whole B-tree, leaving just an empty root page. This function the whole B-tree, leaving just an empty root page. This function
should never be reached, because btr_compress(), which is invoked in should almost never be reached, because btr_compress(), which is invoked in
delete operations, calls btr_lift_page_up() to flatten the B-tree. */ delete operations, calls btr_lift_page_up() to flatten the B-tree. */
ATTRIBUTE_COLD
static static
void void
btr_discard_only_page_on_level( btr_discard_only_page_on_level(
...@@ -4318,10 +4287,7 @@ btr_discard_page( ...@@ -4318,10 +4287,7 @@ btr_discard_page(
buf_block_t* block; buf_block_t* block;
page_t* page; page_t* page;
rec_t* node_ptr; rec_t* node_ptr;
#ifdef UNIV_DEBUG
btr_cur_t parent_cursor; btr_cur_t parent_cursor;
bool parent_is_different = false;
#endif
block = btr_cur_get_block(cursor); block = btr_cur_get_block(cursor);
index = btr_cur_get_index(cursor); index = btr_cur_get_index(cursor);
...@@ -4337,13 +4303,11 @@ btr_discard_page( ...@@ -4337,13 +4303,11 @@ btr_discard_page(
MONITOR_INC(MONITOR_INDEX_DISCARD); MONITOR_INC(MONITOR_INDEX_DISCARD);
#ifdef UNIV_DEBUG
if (dict_index_is_spatial(index)) { if (dict_index_is_spatial(index)) {
rtr_page_get_father(index, block, mtr, cursor, &parent_cursor); rtr_page_get_father(index, block, mtr, cursor, &parent_cursor);
} else { } else {
btr_page_get_father(index, block, mtr, &parent_cursor); btr_page_get_father(index, block, mtr, &parent_cursor);
} }
#endif
/* Decide the page which will inherit the locks */ /* Decide the page which will inherit the locks */
...@@ -4351,7 +4315,7 @@ btr_discard_page( ...@@ -4351,7 +4315,7 @@ btr_discard_page(
right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr); right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr);
const page_size_t page_size(dict_table_page_size(index->table)); const page_size_t page_size(dict_table_page_size(index->table));
ut_d(bool parent_is_different = false);
if (left_page_no != FIL_NULL) { if (left_page_no != FIL_NULL) {
merge_block = btr_block_get( merge_block = btr_block_get(
page_id_t(space, left_page_no), page_size, page_id_t(space, left_page_no), page_size,
...@@ -4407,15 +4371,9 @@ btr_discard_page( ...@@ -4407,15 +4371,9 @@ btr_discard_page(
} }
if (dict_index_is_spatial(index)) { if (dict_index_is_spatial(index)) {
btr_cur_t father_cursor; rtr_node_ptr_delete(index, &parent_cursor, block, mtr);
/* Since rtr_node_ptr_delete doesn't contain get father
node ptr, so, we need to get father node ptr first and then
delete it. */
rtr_page_get_father(index, block, mtr, cursor, &father_cursor);
rtr_node_ptr_delete(index, &father_cursor, block, mtr);
} else { } else {
btr_node_ptr_delete(index, block, mtr); btr_cur_node_ptr_delete(&parent_cursor, mtr);
} }
/* Remove the page from the level list */ /* Remove the page from the level list */
...@@ -4453,6 +4411,12 @@ btr_discard_page( ...@@ -4453,6 +4411,12 @@ btr_discard_page(
we cannot use btr_check_node_ptr() */ we cannot use btr_check_node_ptr() */
ut_ad(parent_is_different ut_ad(parent_is_different
|| btr_check_node_ptr(index, merge_block, mtr)); || btr_check_node_ptr(index, merge_block, mtr));
if (btr_cur_get_block(&parent_cursor)->page.id.page_no() == index->page
&& !page_has_siblings(btr_cur_get_page(&parent_cursor))
&& page_get_n_recs(btr_cur_get_page(&parent_cursor)) == 1) {
btr_lift_page_up(index, merge_block, mtr);
}
} }
#ifdef UNIV_BTR_PRINT #ifdef UNIV_BTR_PRINT
......
...@@ -5358,8 +5358,10 @@ btr_cur_pessimistic_delete( ...@@ -5358,8 +5358,10 @@ btr_cur_pessimistic_delete(
on the page */ on the page */
ulint level = btr_page_get_level(page, mtr); ulint level = btr_page_get_level(page, mtr);
btr_node_ptr_delete(index, block, mtr); btr_cur_t cursor;
btr_page_get_father(index, block, mtr, &cursor);
btr_cur_node_ptr_delete(&cursor, mtr);
// FIXME: reuse the node_ptr from above
dtuple_t* node_ptr = dict_index_build_node_ptr( dtuple_t* node_ptr = dict_index_build_node_ptr(
index, next_rec, block->page.id.page_no(), index, next_rec, block->page.id.page_no(),
heap, level); heap, level);
...@@ -5428,6 +5430,23 @@ btr_cur_pessimistic_delete( ...@@ -5428,6 +5430,23 @@ btr_cur_pessimistic_delete(
return(ret); return(ret);
} }
/** Delete the node pointer in a parent page.
@param[in,out] parent cursor pointing to parent record
@param[in,out] mtr mini-transaction */
void btr_cur_node_ptr_delete(btr_cur_t* parent, mtr_t* mtr)
{
ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(parent),
MTR_MEMO_PAGE_X_FIX));
dberr_t err;
ibool compressed = btr_cur_pessimistic_delete(&err, TRUE, parent,
BTR_CREATE_FLAG, false,
mtr);
ut_a(err == DB_SUCCESS);
if (!compressed) {
btr_cur_compress_if_useful(parent, FALSE, mtr);
}
}
/*******************************************************************//** /*******************************************************************//**
Adds path information to the cursor for the current page, for which Adds path information to the cursor for the current page, for which
the binary search has been performed. */ the binary search has been performed. */
......
...@@ -484,6 +484,7 @@ btr_defragment_merge_pages( ...@@ -484,6 +484,7 @@ btr_defragment_merge_pages(
ULINT_UNDEFINED); ULINT_UNDEFINED);
} }
} }
btr_cur_t parent;
if (n_recs_to_move == n_recs) { if (n_recs_to_move == n_recs) {
/* The whole page is merged with the previous page, /* The whole page is merged with the previous page,
free it. */ free it. */
...@@ -491,7 +492,8 @@ btr_defragment_merge_pages( ...@@ -491,7 +492,8 @@ btr_defragment_merge_pages(
from_block); from_block);
btr_search_drop_page_hash_index(from_block); btr_search_drop_page_hash_index(from_block);
btr_level_list_remove(space, page_size, (page_t*)from_page, index, mtr); btr_level_list_remove(space, page_size, (page_t*)from_page, index, mtr);
btr_node_ptr_delete(index, from_block, mtr); btr_page_get_father(index, from_block, mtr, &parent);
btr_cur_node_ptr_delete(&parent, mtr);
/* btr_blob_dbg_remove(from_page, index, /* btr_blob_dbg_remove(from_page, index,
"btr_defragment_n_pages"); */ "btr_defragment_n_pages"); */
btr_page_free(index, from_block, mtr); btr_page_free(index, from_block, mtr);
...@@ -509,7 +511,9 @@ btr_defragment_merge_pages( ...@@ -509,7 +511,9 @@ btr_defragment_merge_pages(
lock_update_split_and_merge(to_block, lock_update_split_and_merge(to_block,
orig_pred, orig_pred,
from_block); from_block);
btr_node_ptr_delete(index, from_block, mtr); // FIXME: reuse the node_ptr!
btr_page_get_father(index, from_block, mtr, &parent);
btr_cur_node_ptr_delete(&parent, mtr);
rec = page_rec_get_next( rec = page_rec_get_next(
page_get_infimum_rec(from_page)); page_get_infimum_rec(from_page));
node_ptr = dict_index_build_node_ptr( node_ptr = dict_index_build_node_ptr(
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc. Copyright (c) 2012, Facebook Inc.
Copyright (c) 2014, 2018, MariaDB Corporation. Copyright (c) 2014, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -545,14 +545,13 @@ btr_set_min_rec_mark( ...@@ -545,14 +545,13 @@ btr_set_min_rec_mark(
rec_t* rec, /*!< in/out: record */ rec_t* rec, /*!< in/out: record */
mtr_t* mtr) /*!< in: mtr */ mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((nonnull)); MY_ATTRIBUTE((nonnull));
/*************************************************************//** /** Seek to the parent page of a B-tree page.
Deletes on the upper level the node pointer to a page. */ @param[in,out] index b-tree
void @param[in] block child page
btr_node_ptr_delete( @param[in,out] mtr mini-transaction
/*================*/ @param[out] cursor cursor pointing to the x-latched parent page */
dict_index_t* index, /*!< in: index tree */ void btr_page_get_father(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
buf_block_t* block, /*!< in: page whose node pointer is deleted */ btr_cur_t* cursor)
mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((nonnull)); MY_ATTRIBUTE((nonnull));
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
/************************************************************//** /************************************************************//**
......
/***************************************************************************** /*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation. Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -529,6 +529,11 @@ btr_cur_pessimistic_delete( ...@@ -529,6 +529,11 @@ btr_cur_pessimistic_delete(
bool rollback,/*!< in: performing rollback? */ bool rollback,/*!< in: performing rollback? */
mtr_t* mtr) /*!< in: mtr */ mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((nonnull)); MY_ATTRIBUTE((nonnull));
/** Delete the node pointer in a parent page.
@param[in,out] parent cursor pointing to parent record
@param[in,out] mtr mini-transaction */
void btr_cur_node_ptr_delete(btr_cur_t* parent, mtr_t* mtr)
MY_ATTRIBUTE((nonnull));
/***********************************************************//** /***********************************************************//**
Parses a redo log record of updating a record in-place. Parses a redo log record of updating a record in-place.
@return end of log record or NULL */ @return end of log record or NULL */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment