Commit 2347ffd8 authored by Aleksey Midenkov's avatar Aleksey Midenkov

MDEV-20301 InnoDB's MVCC has O(N^2) behaviors

If there're multiple row versions in InnoDB, reading one row from PK
may have O(N) complexity and reading from secondary keys may have
O(N^2) complexity.

The problem occurs when there are many pending versions of the same
row, meaning that the primary key is the same, but a secondary key is
different.  The slowdown occurs when the secondary index is
traversed. This patch creates a helper class for the function
row_sel_get_clust_rec_for_mysql() which can remember and re-use
cached_clust_rec & cached_old_vers so that rec_get_offsets() does not
need to be called over and over for the clustered record.

Corrections by Kevin Lewis <kevin.lewis@oracle.com>

MDEV-20341 Unstable innodb.innodb_bug14704286

Removed test that tested the ability of interrupting long query which
is not long anymore.
parent 65296123
use test;
drop table if exists t1;
create table t1 (id int primary key, value int, value2 int,
value3 int, index(value,value2)) engine=innodb;
insert into t1 values
(10,10,10,10),(11,11,11,11),(12,12,12,12),(13,13,13,13),(14,14,14,14),
(15,15,15,15),(16,16,16,16),(17,17,17,17),(18,18,18,18),(19,19,19,19),
(20,20,20,20);
connect conn1, localhost, root,,;
connect conn2, localhost, root,,;
connect conn3, localhost, root,,;
connection conn1;
use test;
start transaction with consistent snapshot;
connection conn2;
use test;
CREATE PROCEDURE update_t1()
BEGIN
DECLARE i INT DEFAULT 1;
while (i <= 5000) DO
update test.t1 set value2=value2+1, value3=value3+1 where id=12;
SET i = i + 1;
END WHILE;
END|
set autocommit=0;
CALL update_t1();
select * from t1;
id value value2 value3
10 10 10 10
11 11 11 11
12 12 5012 5012
13 13 13 13
14 14 14 14
15 15 15 15
16 16 16 16
17 17 17 17
18 18 18 18
19 19 19 19
20 20 20 20
set autocommit=1;
select * from t1;
id value value2 value3
10 10 10 10
11 11 11 11
12 12 5012 5012
13 13 13 13
14 14 14 14
15 15 15 15
16 16 16 16
17 17 17 17
18 18 18 18
19 19 19 19
20 20 20 20
connection conn1;
select * from t1 force index(value) where value=12;
connection conn3;
kill query @id;
connection conn1;
ERROR 70100: Query execution was interrupted
connection default;
disconnect conn1;
disconnect conn2;
disconnect conn3;
drop procedure if exists update_t1;
drop table if exists t1;
#
# Bug #84958 InnoDB's MVCC has O(N^2) behaviors
# https://bugs.mysql.com/bug.php?id=84958
#
# Set up the test with a procedure and a function.
#
CREATE PROCEDURE insert_n(start int, end int)
BEGIN
DECLARE i INT DEFAULT start;
WHILE i <= end do
INSERT INTO t1 VALUES (1, 2, 3) ON DUPLICATE KEY UPDATE c = i;
SET i = i + 1;
END WHILE;
END~~
CREATE FUNCTION num_pages_get()
RETURNS INT
BEGIN
DECLARE ret INT;
SELECT variable_value INTO ret
FROM information_schema.global_status
WHERE variable_name = 'innodb_buffer_pool_read_requests';
RETURN ret;
END~~
#
# Create a table with one record in it and start an RR transaction
#
CREATE TABLE t1 (a INT, b INT, c INT, PRIMARY KEY(a,b), KEY (b,c))
ENGINE=InnoDB;
BEGIN;
SELECT * FROM t1;
a b c
#
# Create 100 newer record versions in con2 and con3
#
connect con2, localhost, root,,;
connection con2;
INSERT INTO t1 VALUES (1, 2, 3) ON DUPLICATE KEY UPDATE c = NULL;
CALL insert_n(1, 50);;
connect con3, localhost, root,,;
connection con3;
CALL insert_n(51, 100);;
connection con2;
connection con3;
INSERT INTO t1 VALUES (1, 2, 1) ON DUPLICATE KEY UPDATE c = NULL;
connection default;
#
# Connect to default and record how many pages were accessed
# when selecting the record using the secondary key.
#
SET @num_pages_1 = num_pages_get();
SELECT * FROM t1 force index (b);
a b c
SET @num_pages_2= num_pages_get();
SELECT @num_pages_2 - @num_pages_1 < 500;
@num_pages_2 - @num_pages_1 < 500
1
#
# Commit and show the final record.
#
SELECT * FROM t1;
a b c
SELECT * FROM t1 force index (b);
a b c
COMMIT;
SELECT * FROM t1 force index (b);
a b c
1 2 NULL
SELECT * FROM t1;
a b c
1 2 NULL
CHECK TABLE t1;
Table Op Msg_type Msg_text
test.t1 check status OK
#
# Cleanup
#
disconnect con2;
disconnect con3;
DROP TABLE t1;
DROP PROCEDURE insert_n;
DROP FUNCTION num_pages_get;
--source include/have_innodb.inc
#
# create test-bed to run test
#
use test;
--disable_warnings
drop table if exists t1;
--enable_warnings
create table t1 (id int primary key, value int, value2 int,
value3 int, index(value,value2)) engine=innodb;
insert into t1 values
(10,10,10,10),(11,11,11,11),(12,12,12,12),(13,13,13,13),(14,14,14,14),
(15,15,15,15),(16,16,16,16),(17,17,17,17),(18,18,18,18),(19,19,19,19),
(20,20,20,20);
let $ID= `SELECT @id := CONNECTION_ID()`;
#
# we need multiple connections as we need to keep one connection
# active with trx requesting consistent read.
#
connect (conn1, localhost, root,,);
connect (conn2, localhost, root,,);
connect (conn3, localhost, root,,);
#
# start trx with consistent read
#
connection conn1;
use test;
start transaction with consistent snapshot;
#
# update table such that secondary index is updated.
#
connection conn2;
use test;
delimiter |;
CREATE PROCEDURE update_t1()
BEGIN
DECLARE i INT DEFAULT 1;
while (i <= 5000) DO
update test.t1 set value2=value2+1, value3=value3+1 where id=12;
SET i = i + 1;
END WHILE;
END|
delimiter ;|
set autocommit=0;
CALL update_t1();
select * from t1;
set autocommit=1;
select * from t1;
#
# Now try to fire select query from connection-1 enforcing
# use of secondary index.
#
connection conn1;
let $ID= `SELECT @id := CONNECTION_ID()`;
#--error ER_QUERY_INTERRUPTED
--send
select * from t1 force index(value) where value=12;
#
# select is going to take good time so let's kill query.
#
connection conn3;
let $wait_condition=
select * from information_schema.processlist where state = 'Sending data' and
info = 'select * from t1 force index(value) where value=12';
--source include/wait_condition.inc
let $ignore= `SELECT @id := $ID`;
kill query @id;
#
# reap the value of connection-1
#
connection conn1;
--error ER_QUERY_INTERRUPTED
reap;
#
# clean test-bed.
#
connection default;
disconnect conn1;
disconnect conn2;
disconnect conn3;
drop procedure if exists update_t1;
drop table if exists t1;
--echo #
--echo # Bug #84958 InnoDB's MVCC has O(N^2) behaviors
--echo # https://bugs.mysql.com/bug.php?id=84958
--echo #
--echo # Set up the test with a procedure and a function.
--echo #
--source include/have_innodb.inc
DELIMITER ~~;
CREATE PROCEDURE insert_n(start int, end int)
BEGIN
DECLARE i INT DEFAULT start;
WHILE i <= end do
INSERT INTO t1 VALUES (1, 2, 3) ON DUPLICATE KEY UPDATE c = i;
SET i = i + 1;
END WHILE;
END~~
CREATE FUNCTION num_pages_get()
RETURNS INT
BEGIN
DECLARE ret INT;
SELECT variable_value INTO ret
FROM information_schema.global_status
WHERE variable_name = 'innodb_buffer_pool_read_requests';
RETURN ret;
END~~
DELIMITER ;~~
--echo #
--echo # Create a table with one record in it and start an RR transaction
--echo #
CREATE TABLE t1 (a INT, b INT, c INT, PRIMARY KEY(a,b), KEY (b,c))
ENGINE=InnoDB;
BEGIN;
SELECT * FROM t1;
--echo #
--echo # Create 100 newer record versions in con2 and con3
--echo #
connect (con2, localhost, root,,);
connection con2;
INSERT INTO t1 VALUES (1, 2, 3) ON DUPLICATE KEY UPDATE c = NULL;
--send CALL insert_n(1, 50);
connect (con3, localhost, root,,);
connection con3;
--send CALL insert_n(51, 100);
connection con2;
reap;
connection con3;
reap;
INSERT INTO t1 VALUES (1, 2, 1) ON DUPLICATE KEY UPDATE c = NULL;
connection default;
--echo #
--echo # Connect to default and record how many pages were accessed
--echo # when selecting the record using the secondary key.
--echo #
SET @num_pages_1 = num_pages_get();
SELECT * FROM t1 force index (b);
SET @num_pages_2= num_pages_get();
SELECT @num_pages_2 - @num_pages_1 < 500;
--echo #
--echo # Commit and show the final record.
--echo #
SELECT * FROM t1;
SELECT * FROM t1 force index (b);
COMMIT;
SELECT * FROM t1 force index (b);
SELECT * FROM t1;
CHECK TABLE t1;
--echo #
--echo # Cleanup
--echo #
disconnect con2;
disconnect con3;
DROP TABLE t1;
DROP PROCEDURE insert_n;
DROP FUNCTION num_pages_get;
...@@ -3309,14 +3309,29 @@ row_sel_build_prev_vers_for_mysql( ...@@ -3309,14 +3309,29 @@ row_sel_build_prev_vers_for_mysql(
return(err); return(err);
} }
/** Helper class to cache clust_rec and old_ver */
class Row_sel_get_clust_rec_for_mysql
{
const rec_t *cached_clust_rec;
rec_t *cached_old_vers;
public:
Row_sel_get_clust_rec_for_mysql() :
cached_clust_rec(NULL), cached_old_vers(NULL) {}
dberr_t operator()(row_prebuilt_t *prebuilt, dict_index_t *sec_index,
const rec_t *rec, que_thr_t *thr, const rec_t **out_rec,
ulint **offsets, mem_heap_t **offset_heap,
dtuple_t **vrow, mtr_t *mtr);
};
/*********************************************************************//** /*********************************************************************//**
Retrieves the clustered index record corresponding to a record in a Retrieves the clustered index record corresponding to a record in a
non-clustered index. Does the necessary locking. Used in the MySQL non-clustered index. Does the necessary locking. Used in the MySQL
interface. interface.
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
static MY_ATTRIBUTE((warn_unused_result))
dberr_t dberr_t
row_sel_get_clust_rec_for_mysql( Row_sel_get_clust_rec_for_mysql::operator()(
/*============================*/ /*============================*/
row_prebuilt_t* prebuilt,/*!< in: prebuilt struct in the handle */ row_prebuilt_t* prebuilt,/*!< in: prebuilt struct in the handle */
dict_index_t* sec_index,/*!< in: secondary index where rec resides */ dict_index_t* sec_index,/*!< in: secondary index where rec resides */
...@@ -3508,15 +3523,36 @@ row_sel_get_clust_rec_for_mysql( ...@@ -3508,15 +3523,36 @@ row_sel_get_clust_rec_for_mysql(
clust_rec, clust_index, *offsets, clust_rec, clust_index, *offsets,
trx_get_read_view(trx))) { trx_get_read_view(trx))) {
/* The following call returns 'offsets' associated with if (clust_rec != cached_clust_rec) {
'old_vers' */ /* The following call returns 'offsets' associated with
err = row_sel_build_prev_vers_for_mysql( 'old_vers' */
trx->read_view, clust_index, prebuilt, err = row_sel_build_prev_vers_for_mysql(
clust_rec, offsets, offset_heap, &old_vers, trx->read_view, clust_index, prebuilt,
vrow, mtr); clust_rec, offsets, offset_heap, &old_vers,
vrow, mtr);
if (err != DB_SUCCESS) {
goto err_exit;
}
cached_clust_rec = clust_rec;
cached_old_vers = old_vers;
} else {
err = DB_SUCCESS;
old_vers = cached_old_vers;
/* The offsets need not be same for the latest
version of clust_rec and its old version
old_vers. Re-calculate the offsets for old_vers. */
if (err != DB_SUCCESS || old_vers == NULL) { if (old_vers != NULL) {
*offsets = rec_get_offsets(
old_vers, clust_index, *offsets,
true, ULINT_UNDEFINED, offset_heap);
}
}
if (old_vers == NULL) {
goto err_exit; goto err_exit;
} }
...@@ -4233,6 +4269,7 @@ row_search_mvcc( ...@@ -4233,6 +4269,7 @@ row_search_mvcc(
dtuple_t* vrow = NULL; dtuple_t* vrow = NULL;
const rec_t* result_rec = NULL; const rec_t* result_rec = NULL;
const rec_t* clust_rec; const rec_t* clust_rec;
Row_sel_get_clust_rec_for_mysql row_sel_get_clust_rec_for_mysql;
dberr_t err = DB_SUCCESS; dberr_t err = DB_SUCCESS;
ibool unique_search = FALSE; ibool unique_search = FALSE;
ibool mtr_has_extra_clust_latch = FALSE; ibool mtr_has_extra_clust_latch = FALSE;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment