Commit 222e800e authored by Vlad Lesin's avatar Vlad Lesin

MDEV-21136 InnoDB's records_in_range estimates can be way off

Get rid of BTR_ESTIMATE and btr_cur_t::path_arr.

Before the fix btr_estimate_n_rows_in_range_low() used two
btr_cur_search_to_nth_level() calls to create two arrays of tree path,
the array per border. And then it tried to estimate the number of rows
diving level-by-level with the array elements. As the path pages are
unlatched during the arrays iterating, the tree could be modified, the
estimation function called itself until the number of attempts exceed.

After the fix the estimation happens during search process. Roughly, the
algorithm is the following. Dive in the left page, then if there are pages
between left and right ones, read a few pages to the right, if the right
page is reached, fetch it and count the exact number of rows, otherwise
count the estimated number of rows, and fetch the right page.

The latching order corresponds to WL#6326 rules, i.e.:

(2.1) [same as (1.1)]: Page latches must be acquired in descending order
of tree level.

(2.2) When acquiring a node pointer page latch at level L, we must hold
the left sibling page latch (at level L) or some ancestor latch
(at level>L).

When we dive to the level down, the parent page is unlatched only after
the the current level page is latched. When we estimate the number of rows
on some level, we latch the left border, then fetch the next page, and
then fetch the next page unlatching the previous page after the current
page is latched until the right border is reached. I.e. the left sibling
is always latched when we acquire page latch on the same level. When we
reach the right border, the current page is unlatched, and then the right
border is latched. Following to (2.2) rule, we can do this because the
right border's parent is latched.
parent 6156a2be
This diff is collapsed.
...@@ -129,10 +129,6 @@ enum btr_latch_mode { ...@@ -129,10 +129,6 @@ enum btr_latch_mode {
BTR_PURGE_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE BTR_PURGE_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE
}; };
/** This flag ORed to btr_latch_mode says that we do the search in query
optimization */
#define BTR_ESTIMATE 1024U
/** This flag ORed to BTR_INSERT says that we can ignore possible /** This flag ORed to BTR_INSERT says that we can ignore possible
UNIQUE definition on secondary indexes when we decide if we can use UNIQUE definition on secondary indexes when we decide if we can use
the insert buffer to speed up inserts */ the insert buffer to speed up inserts */
...@@ -160,7 +156,6 @@ record is in spatial index */ ...@@ -160,7 +156,6 @@ record is in spatial index */
| BTR_RTREE_UNDO_INS \ | BTR_RTREE_UNDO_INS \
| BTR_RTREE_DELETE_MARK \ | BTR_RTREE_DELETE_MARK \
| BTR_DELETE \ | BTR_DELETE \
| BTR_ESTIMATE \
| BTR_IGNORE_SEC_UNIQUE \ | BTR_IGNORE_SEC_UNIQUE \
| BTR_ALREADY_S_LATCHED \ | BTR_ALREADY_S_LATCHED \
| BTR_LATCH_FOR_INSERT \ | BTR_LATCH_FOR_INSERT \
......
...@@ -165,7 +165,7 @@ btr_cur_search_to_nth_level_func( ...@@ -165,7 +165,7 @@ btr_cur_search_to_nth_level_func(
search the position! */ search the position! */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
at most one of BTR_INSERT, BTR_DELETE_MARK, at most one of BTR_INSERT, BTR_DELETE_MARK,
BTR_DELETE, or BTR_ESTIMATE; BTR_DELETE;
cursor->left_block is used to store a pointer cursor->left_block is used to store a pointer
to the left neighbor page, in the cases to the left neighbor page, in the cases
BTR_SEARCH_PREV and BTR_MODIFY_PREV; BTR_SEARCH_PREV and BTR_MODIFY_PREV;
...@@ -531,16 +531,20 @@ struct btr_pos_t ...@@ -531,16 +531,20 @@ struct btr_pos_t
page_id_t page_id; /* Out: Page where we found the tuple */ page_id_t page_id; /* Out: Page where we found the tuple */
}; };
/** Estimates the number of rows in a given index range. /** Estimates the number of rows in a given index range. Do search in the
@param[in] index index left page, then if there are pages between left and right ones, read a few
@param[in/out] range_start pages to the right, if the right page is reached, fetch it and count the exact
@param[in/out] range_ end number of rows, otherwise count the estimated(see
@return estimated number of rows */ btr_estimate_n_rows_in_range_on_level() for details) number if rows, and
ha_rows fetch the right page. If leaves are reached, unlatch non-leaf pages except
btr_estimate_n_rows_in_range( the right leaf parent. After the right leaf page is fetched, commit mtr.
dict_index_t* index, @param[in] index index
btr_pos_t* range_start, @param[in] range_start range start
btr_pos_t* range_end); @param[in] range_end range end
@return estimated number of rows; */
ha_rows btr_estimate_n_rows_in_range(dict_index_t *index,
btr_pos_t *range_start,
btr_pos_t *range_end);
/** Gets the externally stored size of a record, in units of a database page. /** Gets the externally stored size of a record, in units of a database page.
@param[in] rec record @param[in] rec record
......
...@@ -54,14 +54,11 @@ page_zip_des_t* ...@@ -54,14 +54,11 @@ page_zip_des_t*
page_cur_get_page_zip( page_cur_get_page_zip(
/*==================*/ /*==================*/
page_cur_t* cur); /*!< in: page cursor */ page_cur_t* cur); /*!< in: page cursor */
/*********************************************************//** /* Gets the record where the cursor is positioned.
Gets the record where the cursor is positioned. @param cur page cursor
@return record */ @return record */
UNIV_INLINE UNIV_INLINE
rec_t* rec_t *page_cur_get_rec(const page_cur_t *cur);
page_cur_get_rec(
/*=============*/
page_cur_t* cur); /*!< in: page cursor */
#else /* UNIV_DEBUG */ #else /* UNIV_DEBUG */
# define page_cur_get_page(cur) page_align((cur)->rec) # define page_cur_get_page(cur) page_align((cur)->rec)
# define page_cur_get_block(cur) (cur)->block # define page_cur_get_block(cur) (cur)->block
......
...@@ -63,14 +63,11 @@ page_cur_get_page_zip( ...@@ -63,14 +63,11 @@ page_cur_get_page_zip(
return(buf_block_get_page_zip(page_cur_get_block(cur))); return(buf_block_get_page_zip(page_cur_get_block(cur)));
} }
/*********************************************************//** /* Gets the record where the cursor is positioned.
Gets the record where the cursor is positioned. @param cur page cursor
@return record */ @return record */
UNIV_INLINE UNIV_INLINE
rec_t* rec_t *page_cur_get_rec(const page_cur_t *cur)
page_cur_get_rec(
/*=============*/
page_cur_t* cur) /*!< in: page cursor */
{ {
ut_ad(cur); ut_ad(cur);
ut_ad(!cur->rec || page_align(cur->rec) == cur->block->page.frame); ut_ad(!cur->rec || page_align(cur->rec) == cur->block->page.frame);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment