Commit a4fa940b authored by Jan Lindström's avatar Jan Lindström

MDEV-11336: Enable defragmentation on 10.2 when tests pass

Problem was that we could take page latches on different
order than wat is entitled with SX-lock. To follow the
latching order defined in WL#6326, acquire index->lock X-latch.
This entitles us to acquire page latches in any order for the index.

btr0btr.cc
	Document latch rules before and after MariaDB 10.2.2

sync0rw.cc
	Document latch compatibility rules better.

btr_defragment_merge_pages
	Fix parameter value.

btr_defragment_thread
	Acquire X-lock to dict_index_t::lock before restoring
	cursor position and continuing defragmentation.

ha_innobase::optimize
	Restore defragment feature.

Testing
	Add GIS-index and FT-index to table being defragmented.

	Defragmentation is not done to GIS-indexes and FT auxiliary
	tables.
parent 4c9d19ee
......@@ -10,14 +10,5 @@
#
##############################################################################
innodb_defragment_fill_factor : MDEV-11336 Fix and enable innodb_defragment
innodb.defrag_mdl-9155 : MDEV-11336 Fix and enable innodb_defragment
innodb.innodb_defrag_concurrent : MDEV-11336 Fix and enable innodb_defragment
innodb.innodb_defrag_stats : MDEV-11336 Fix and enable innodb_defragment
innodb.innodb_defrag_stats_many_tables : MDEV-11336 Fix and enable innodb_defragment
innodb.innodb_defragment : MDEV-11336 Fix and enable innodb_defragment
innodb.innodb_defragment_fill_factor : MDEV-11336 Fix and enable innodb_defragment
innodb.innodb_defragment_small : MDEV-11336 Fix and enable innodb_defragment
innodb.innodb_defrag_binlog : MDEV-11336 Fix and enable innodb_defragment
innodb-wl5980-alter : MDEV-9469 / MDEV-13668 extra crash in 10.2
create-index-debug : MDEV-13680 InnoDB may crash when btr_page_alloc() fails
......@@ -3,7 +3,15 @@ select @@global.innodb_stats_persistent;
@@global.innodb_stats_persistent
0
set global innodb_defragment_stats_accuracy = 80;
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), c INT, KEY second(a, b),KEY third(c)) ENGINE=INNODB;
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
b VARCHAR(256),
c INT,
g GEOMETRY NOT NULL,
t VARCHAR(256),
KEY second(a, b),
KEY third(c),
SPATIAL gk(g),
FULLTEXT INDEX fti(t)) ENGINE=INNODB;
connect con1,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
connect con2,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
connect con3,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK;
......@@ -40,9 +48,9 @@ count(stat_value) > 0
connection con1;
optimize table t1;;
connection default;
INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000);;
INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000, Point(1,1),'More like a test but different.');;
connection con2;
INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000);;
INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000, Point(1,1),'Totally different text book.');;
connection con3;
DELETE FROM t1 where a between 1 and 100;;
connection con4;
......@@ -59,6 +67,9 @@ disconnect con4;
optimize table t1;
Table Op Msg_type Msg_text
test.t1 optimize status OK
check table t1 extended;
Table Op Msg_type Msg_text
test.t1 check status OK
select count(*) from t1;
count(*)
15723
......
......@@ -16,7 +16,26 @@ select @@global.innodb_stats_persistent;
set global innodb_defragment_stats_accuracy = 80;
# Create table.
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256), c INT, KEY second(a, b),KEY third(c)) ENGINE=INNODB;
#
# TODO: Currently we do not defragment spatial indexes,
# because doing it properly would require
# appropriate logic around the SSN (split
# sequence number).
#
# Also do not defragment auxiliary tables related to FULLTEXT INDEX.
#
# Both types added to this test to make sure they do not cause
# problems.
#
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT,
b VARCHAR(256),
c INT,
g GEOMETRY NOT NULL,
t VARCHAR(256),
KEY second(a, b),
KEY third(c),
SPATIAL gk(g),
FULLTEXT INDEX fti(t)) ENGINE=INNODB;
connect (con1,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK);
connect (con2,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK);
......@@ -36,7 +55,7 @@ let $i = $data_size;
while ($i)
{
eval
INSERT INTO t1 VALUES ($data_size + 1 - $i, REPEAT('A', 256), $i);
INSERT INTO t1 VALUES ($data_size + 1 - $i, REPEAT('A', 256), $i, Point($i,$i), 'This is a test message.');
dec $i;
}
--enable_query_log
......@@ -69,10 +88,10 @@ connection con1;
--send optimize table t1;
connection default;
--send INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000);
--send INSERT INTO t1 VALUES (400000, REPEAT('A', 256),300000, Point(1,1),'More like a test but different.');
connection con2;
--send INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000);
--send INSERT INTO t1 VALUES (500000, REPEAT('A', 256),400000, Point(1,1),'Totally different text book.');
connection con3;
--send DELETE FROM t1 where a between 1 and 100;
......@@ -103,6 +122,7 @@ disconnect con3;
disconnect con4;
optimize table t1;
check table t1 extended;
select count(*) from t1;
select count(*) from t1 force index (second);
......
......@@ -77,22 +77,85 @@ btr_corruption_report(
/*
Latching strategy of the InnoDB B-tree
--------------------------------------
A tree latch protects all non-leaf nodes of the tree. Each node of a tree
also has a latch of its own.
A B-tree operation normally first acquires an S-latch on the tree. It
searches down the tree and releases the tree latch when it has the
leaf node latch. To save CPU time we do not acquire any latch on
non-leaf nodes of the tree during a search, those pages are only bufferfixed.
If an operation needs to restructure the tree, it acquires an X-latch on
the tree before searching to a leaf node. If it needs, for example, to
split a leaf,
(1) InnoDB decides the split point in the leaf,
(2) allocates a new page,
(3) inserts the appropriate node pointer to the first non-leaf level,
(4) releases the tree X-latch,
(5) and then moves records from the leaf to the new allocated page.
Node pointer page latches acquisition is protected by index->lock latch.
Before MariaDB 10.2.2, all node pointer pages were protected by index->lock
either in S (shared) or X (exclusive) mode and block->lock was not acquired on
node pointer pages.
After MariaDB 10.2.2, block->lock S-latch or X-latch is used to protect
node pointer pages and obtaiment of node pointer page latches is protected by
index->lock.
(0) Definition: B-tree level.
(0.1) The leaf pages of the B-tree are at level 0.
(0.2) The parent of a page at level L has level L+1. (The level of the
root page is equal to the tree height.)
(0.3) The B-tree lock (index->lock) is the parent of the root page and
has a level = tree height + 1.
Index->lock has 3 possible locking modes:
(1) S-latch:
(1.1) All latches for pages must be obtained in descending order of tree level.
(1.2) Before obtaining the first node pointer page latch at a given B-tree
level, parent latch must be held (at level +1 ).
(1.3) If a node pointer page is already latched at the same level
we can only obtain latch to its right sibling page latch at the same level.
(1.4) Release of the node pointer page latches must be done in
child-to-parent order. (Prevents deadlocks when obtained index->lock
in SX mode).
(1.4.1) Level L node pointer page latch can be released only when
no latches at children level i.e. level < L are hold.
(1.4.2) All latches from node pointer pages must be released so
that no latches are obtained between.
(1.5) [implied by (1.1), (1.2)] Root page latch must be first node pointer
latch obtained.
(2) SX-latch:
In this case rules (1.2) and (1.3) from S-latch case are relaxed and
merged into (2.2) and rule (1.4) is removed. Thus, latch acquisition
can be skipped at some tree levels and latches can be obtained in
a less restricted order.
(2.1) [identical to (1.1)]: All latches for pages must be obtained in descending
order of tree level.
(2.2) When a node pointer latch at level L is obtained,
the left sibling page latch in the same level or some ancestor
page latch (at level > L) must be hold.
(2.3) [implied by (2.1), (2.2)] The first node pointer page latch obtained can
be any node pointer page.
(3) X-latch:
Node pointer latches can be obtained in any order.
NOTE: New rules after MariaDB 10.2.2 does not affect the latching rules of leaf pages:
index->lock S-latch is needed in read for the node pointer traversal. When the leaf
level is reached, index-lock can be released (and with the MariaDB 10.2.2 changes, all
node pointer latches). Left to right index travelsal in leaf page level can be safely done
by obtaining right sibling leaf page latch and then releasing the old page latch.
Single leaf page modifications (BTR_MODIFY_LEAF) are protected by index->lock
S-latch.
B-tree operations involving page splits or merges (BTR_MODIFY_TREE) and page
allocations are protected by index->lock X-latch.
Node pointers
-------------
......
......@@ -564,7 +564,7 @@ btr_defragment_merge_pages(
page_get_infimum_rec(from_page));
node_ptr = dict_index_build_node_ptr(
index, rec, page_get_page_no(from_page),
heap, level + 1);
heap, level);
btr_insert_on_non_leaf_level(0, index, level+1,
node_ptr, mtr);
}
......@@ -797,11 +797,16 @@ DECLARE_THREAD(btr_defragment_thread)(void*)
now = ut_timer_now();
mtr_start(&mtr);
btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr);
cursor = btr_pcur_get_btr_cur(pcur);
index = btr_cur_get_index(cursor);
first_block = btr_cur_get_block(cursor);
mtr.set_named_space(index->space);
/* To follow the latching order defined in WL#6326, acquire index->lock X-latch.
This entitles us to acquire page latches in any order for the index. */
mtr_x_lock(&index->lock, &mtr);
/* This will acquire index->lock SX-latch, which per WL#6363 is allowed
when we are already holding the X-latch. */
btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr);
first_block = btr_cur_get_block(cursor);
last_block = btr_defragment_n_pages(first_block, index,
srv_defragment_n_pages,
......
......@@ -15066,7 +15066,7 @@ ha_innobase::optimize(
calls to OPTIMIZE, which is undesirable. */
/* TODO: Defragment is disabled for now */
if (0) {
if (srv_defragment) {
int err;
err = defragment_table(m_prebuilt->table->name.m_name, NULL, false);
......
......@@ -84,10 +84,15 @@ lock_word < -(X_LOCK_DECR + X_LOCK_HALF_DECR):
2 - (lock_word + X_LOCK_DECR + X_LOCK_HALF_DECR)
LOCK COMPATIBILITY MATRIX
S SX X
S + + -
SX + - -
X - - -
| S|SX| X|
--+--+--+--+
S| +| +| -|
--+--+--+--+
SX| +| -| -|
--+--+--+--+
X| -| -| -|
--+--+--+--+
The lock_word is always read and updated atomically and consistently, so that
it always represents the state of the lock, and the state of the lock changes
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment