Commit 16e197f5 authored by Sergey Petrunya's avatar Sergey Petrunya

MWL#121: DS-MRR support for clustered primary keys

- Add testcases
- Code cleanup: garbage removal, better comments, make members private where possible
parent 82f8ed17
drop table if exists t0,t1,t2,t3;
set @save_join_cache_level=@@join_cache_level;
set join_cache_level=6;
set @save_storage_engine=@@storage_engine;
set storage_engine=innodb;
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a char(8), b char(8), filler char(100), primary key(a));
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(8) NOT NULL DEFAULT '',
`b` char(8) DEFAULT NULL,
`filler` char(100) DEFAULT NULL,
PRIMARY KEY (`a`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
insert into t1 select
concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
concat('b-', 1000 + A.a + B.a*10 + C.a*100, '=B'),
'filler'
from t0 A, t0 B, t0 C;
create table t2 (a char(8));
insert into t2 values ('a-1010=A'), ('a-1030=A'), ('a-1020=A');
This should use join buffer:
explain select * from t1, t2 where t1.a=t2.a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 3
1 SIMPLE t1 eq_ref PRIMARY PRIMARY 8 test.t2.a 1 Using join buffer
This output must be sorted by value of t1.a:
select * from t1, t2 where t1.a=t2.a;
a b filler a
a-1010=A b-1010=B filler a-1010=A
a-1020=A b-1020=B filler a-1020=A
a-1030=A b-1030=B filler a-1030=A
drop table t1, t2;
create table t1(
a char(8) character set utf8, b int, filler char(100),
primary key(a,b)
);
insert into t1 select
concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
1000 + A.a + B.a*10 + C.a*100,
'filler'
from t0 A, t0 B, t0 C;
create table t2 (a char(8) character set utf8, b int);
insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 3
1 SIMPLE t1 eq_ref PRIMARY PRIMARY 28 test.t2.a,test.t2.b 1 Using join buffer
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
a b filler a b
a-1010=A 1010 filler a-1010=A 1010
a-1020=A 1020 filler a-1020=A 1020
a-1030=A 1030 filler a-1030=A 1030
insert into t2 values ('a-1030=A', 1030), ('a-1020=A', 1020);
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 5
1 SIMPLE t1 eq_ref PRIMARY PRIMARY 28 test.t2.a,test.t2.b 1 Using join buffer
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
a b filler a b
a-1010=A 1010 filler a-1010=A 1010
a-1020=A 1020 filler a-1020=A 1020
a-1020=A 1020 filler a-1020=A 1020
a-1030=A 1030 filler a-1030=A 1030
a-1030=A 1030 filler a-1030=A 1030
drop table t1, t2;
create table t1(
a varchar(8) character set utf8, b int, filler char(100),
primary key(a,b)
);
insert into t1 select
concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
1000 + A.a + B.a*10 + C.a*100,
'filler'
from t0 A, t0 B, t0 C;
create table t2 (a char(8) character set utf8, b int);
insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 3
1 SIMPLE t1 eq_ref PRIMARY PRIMARY 30 test.t2.a,test.t2.b 1 Using index condition(BKA); Using join buffer
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
a b filler a b
a-1010=A 1010 filler a-1010=A 1010
a-1020=A 1020 filler a-1020=A 1020
a-1030=A 1030 filler a-1030=A 1030
explain select * from t1, t2 where t1.a=t2.a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 3
1 SIMPLE t1 ref PRIMARY PRIMARY 26 test.t2.a 1 Using index condition(BKA); Using join buffer
select * from t1, t2 where t1.a=t2.a;
a b filler a b
a-1010=A 1010 filler a-1010=A 1010
a-1020=A 1020 filler a-1020=A 1020
a-1030=A 1030 filler a-1030=A 1030
drop table t1, t2;
create table t1 (a int, b int, c int, filler char(100), primary key(a,b,c));
insert into t1 select A.a, B.a, C.a, 'filler' from t0 A, t0 B, t0 C;
insert into t1 values (11, 11, 11, 'filler');
insert into t1 values (11, 11, 12, 'filler');
insert into t1 values (11, 11, 13, 'filler');
insert into t1 values (11, 22, 1234, 'filler');
insert into t1 values (11, 33, 124, 'filler');
insert into t1 values (11, 33, 125, 'filler');
create table t2 (a int, b int);
insert into t2 values (11,33), (11,22), (11,11);
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t2 ALL NULL NULL NULL NULL 3
1 SIMPLE t1 ref PRIMARY PRIMARY 8 test.t2.a,test.t2.b 1 Using join buffer
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
a b c filler a b
11 11 11 filler 11 11
11 11 12 filler 11 11
11 11 13 filler 11 11
11 22 1234 filler 11 22
11 33 124 filler 11 33
11 33 125 filler 11 33
set join_cache_level=0;
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
a b c filler a b
11 33 124 filler 11 33
11 33 125 filler 11 33
11 22 1234 filler 11 22
11 11 11 filler 11 11
11 11 12 filler 11 11
11 11 13 filler 11 11
set join_cache_level=6;
drop table t1,t2;
set @@join_cache_level= @save_join_cache_level;
set storage_engine=@save_storage_engine;
drop table t0;
#
# Tests for DS-MRR over clustered primary key. The only engine that supports
# this is InnoDB/XtraDB.
#
# Basic idea about testing
# - DS-MRR/CPK works only with BKA
# - Should also test index condition pushdown
# - Should also test whatever uses RANGE_SEQ_IF::skip_record() for filtering
# - Also test access using prefix of primary key
#
# - Forget about cost model, BKA's multi_range_read_info() call passes 10 for
# #rows, the call is there at all only for applicability check
#
-- source include/have_innodb.inc
--disable_warnings
drop table if exists t0,t1,t2,t3;
--enable_warnings
set @save_join_cache_level=@@join_cache_level;
set join_cache_level=6;
set @save_storage_engine=@@storage_engine;
set storage_engine=innodb;
create table t0(a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a char(8), b char(8), filler char(100), primary key(a));
show create table t1;
insert into t1 select
concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
concat('b-', 1000 + A.a + B.a*10 + C.a*100, '=B'),
'filler'
from t0 A, t0 B, t0 C;
create table t2 (a char(8));
insert into t2 values ('a-1010=A'), ('a-1030=A'), ('a-1020=A');
--echo This should use join buffer:
explain select * from t1, t2 where t1.a=t2.a;
--echo This output must be sorted by value of t1.a:
select * from t1, t2 where t1.a=t2.a;
drop table t1, t2;
# Try multi-column indexes
create table t1(
a char(8) character set utf8, b int, filler char(100),
primary key(a,b)
);
insert into t1 select
concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
1000 + A.a + B.a*10 + C.a*100,
'filler'
from t0 A, t0 B, t0 C;
create table t2 (a char(8) character set utf8, b int);
insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
# Try with dataset that causes identical lookup keys:
insert into t2 values ('a-1030=A', 1030), ('a-1020=A', 1020);
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
drop table t1, t2;
create table t1(
a varchar(8) character set utf8, b int, filler char(100),
primary key(a,b)
);
insert into t1 select
concat('a-', 1000 + A.a + B.a*10 + C.a*100, '=A'),
1000 + A.a + B.a*10 + C.a*100,
'filler'
from t0 A, t0 B, t0 C;
create table t2 (a char(8) character set utf8, b int);
insert into t2 values ('a-1010=A', 1010), ('a-1030=A', 1030), ('a-1020=A', 1020);
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
#
# Try scanning on a CPK prefix
#
explain select * from t1, t2 where t1.a=t2.a;
select * from t1, t2 where t1.a=t2.a;
drop table t1, t2;
#
# The above example is not very interesting, as CPK prefix has
# only one match. Create a dataset where scan on CPK prefix
# would produce multiple matches:
#
create table t1 (a int, b int, c int, filler char(100), primary key(a,b,c));
insert into t1 select A.a, B.a, C.a, 'filler' from t0 A, t0 B, t0 C;
insert into t1 values (11, 11, 11, 'filler');
insert into t1 values (11, 11, 12, 'filler');
insert into t1 values (11, 11, 13, 'filler');
insert into t1 values (11, 22, 1234, 'filler');
insert into t1 values (11, 33, 124, 'filler');
insert into t1 values (11, 33, 125, 'filler');
create table t2 (a int, b int);
insert into t2 values (11,33), (11,22), (11,11);
explain select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
set join_cache_level=0;
select * from t1, t2 where t1.a=t2.a and t1.b=t2.b;
set join_cache_level=6;
drop table t1,t2;
#
# Check that Index Condition Pushdown (BKA) actually works:
#
# TODO
#
# Check that record-check-func is done:
#
set @@join_cache_level= @save_join_cache_level;
set storage_engine=@save_storage_engine;
drop table t0;
...@@ -139,8 +139,13 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows, ...@@ -139,8 +139,13 @@ ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
uint key_parts, uint *bufsz, uint key_parts, uint *bufsz,
uint *flags, COST_VECT *cost) uint *flags, COST_VECT *cost)
{ {
/*
Currently we expect this function to be called only in preparation of scan
with HA_MRR_SINGLE_POINT property.
*/
DBUG_ASSERT(*flags | HA_MRR_SINGLE_POINT);
*bufsz= 0; /* Default implementation doesn't need a buffer */ *bufsz= 0; /* Default implementation doesn't need a buffer */
//psergey2-todo: assert for singlepoint ranges here?
*flags |= HA_MRR_USE_DEFAULT_IMPL; *flags |= HA_MRR_USE_DEFAULT_IMPL;
cost->zero(); cost->zero();
...@@ -323,22 +328,25 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, ...@@ -323,22 +328,25 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
n_ranges, key_parts, mode, buf); n_ranges, key_parts, mode, buf);
DBUG_RETURN(retval); DBUG_RETURN(retval);
} }
rowids_buf= buf->buffer; mrr_buf= buf->buffer;
is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION); is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
if (is_mrr_assoc) if (is_mrr_assoc)
status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count); status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count);
rowids_buf_end= buf->buffer_end; mrr_buf_end= buf->buffer_end;
doing_cpk_scan= check_cpk_scan(h->active_index, mode); doing_cpk_scan= check_cpk_scan(h->active_index, mode);
if (doing_cpk_scan) if (doing_cpk_scan)
{ {
/*
When doing a scan on CPK, the buffer stores {lookup_tuple, range_id}
pairs
*/
uint keylen=0; uint keylen=0;
DBUG_ASSERT(key_parts != 0); DBUG_ASSERT(key_parts != 0);
//psergey2-todo: new elem_size here
for (uint kp= 0; kp < key_parts; kp++) for (uint kp= 0; kp < key_parts; kp++)
keylen += table->key_info[h->active_index].key_part[kp].store_length; keylen += table->key_info[h->active_index].key_part[kp].store_length;
...@@ -350,12 +358,29 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, ...@@ -350,12 +358,29 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
use_default_impl= FALSE; use_default_impl= FALSE;
} }
else else
{
/* In regular DS-MRR, buffer stores {rowid, range_id} pairs */
elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*); elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
}
rowids_buf_last= rowids_buf + mrr_buf_last= mrr_buf +
((rowids_buf_end - rowids_buf)/ elem_size)* ((mrr_buf_end - mrr_buf)/ elem_size)*
elem_size; elem_size;
rowids_buf_end= rowids_buf_last; mrr_buf_end= mrr_buf_last;
if (doing_cpk_scan)
{
/*
DS-MRR/CPK: fill buffer with lookup tuples and sort; also we don't need a
secondary handler object.
*/
h->mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
h->mrr_funcs= *seq_funcs;
dsmrr_fill_buffer_cpk();
if (dsmrr_eof)
buf->end_of_used_area= mrr_buf_last;
DBUG_RETURN(0); /* nothing could go wrong while filling the buffer */
}
/* /*
There can be two cases: There can be two cases:
...@@ -365,84 +390,68 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, ...@@ -365,84 +390,68 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
The caller might have called h->index_init(), need to switch h to The caller might have called h->index_init(), need to switch h to
rnd_pos calls. rnd_pos calls.
*/ */
//psergey2-todo: don't create secondary for CPK scan. if (!h2)
if (!doing_cpk_scan)
{ {
if (!h2) /* Create a separate handler object to do rndpos() calls. */
THD *thd= current_thd;
/*
::clone() takes up a lot of stack, especially on 64 bit platforms.
The constant 5 is an empiric result.
*/
if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
DBUG_RETURN(1);
DBUG_ASSERT(h->active_index != MAX_KEY);
uint mrr_keyno= h->active_index;
/* Create a separate handler object to do rndpos() calls. */
if (!(new_h2= h->clone(thd->mem_root)) ||
new_h2->ha_external_lock(thd, F_RDLCK))
{ {
/* Create a separate handler object to do rndpos() calls. */ delete new_h2;
THD *thd= current_thd; DBUG_RETURN(1);
/* }
::clone() takes up a lot of stack, especially on 64 bit platforms.
The constant 5 is an empiric result.
*/
if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2))
DBUG_RETURN(1);
DBUG_ASSERT(h->active_index != MAX_KEY);
uint mrr_keyno= h->active_index;
/* Create a separate handler object to do rndpos() calls. */
if (!(new_h2= h->clone(thd->mem_root)) ||
new_h2->ha_external_lock(thd, F_RDLCK))
{
delete new_h2;
DBUG_RETURN(1);
}
if (mrr_keyno == h->pushed_idx_cond_keyno)
pushed_cond= h->pushed_idx_cond;
/*
Caution: this call will invoke this->dsmrr_close(). Do not put the
created secondary table handler into this->h2 or it will delete it.
*/
if (h->ha_index_end())
{
h2=new_h2;
goto error;
}
h2= new_h2; /* Ok, now can put it into h2 */ if (mrr_keyno == h->pushed_idx_cond_keyno)
table->prepare_for_position(); pushed_cond= h->pushed_idx_cond;
h2->extra(HA_EXTRA_KEYREAD);
if (h2->ha_index_init(mrr_keyno, FALSE))
goto error;
use_default_impl= FALSE; /*
if (pushed_cond) Caution: this call will invoke this->dsmrr_close(). Do not put the
h2->idx_cond_push(mrr_keyno, pushed_cond); created secondary table handler into this->h2 or it will delete it.
} */
else if (h->ha_index_end())
{ {
/* h2=new_h2;
We get here when the access alternates betwen MRR scan(s) and non-MRR goto error;
scans.
Calling h->index_end() will invoke dsmrr_close() for this object,
which will delete h2. We need to keep it, so save put it away and dont
let it be deleted:
*/
handler *save_h2= h2;
h2= NULL;
int res= (h->inited == handler::INDEX && h->ha_index_end());
h2= save_h2;
use_default_impl= FALSE;
if (res)
goto error;
} }
h2= new_h2; /* Ok, now can put it into h2 */
table->prepare_for_position();
h2->extra(HA_EXTRA_KEYREAD);
if (h2->ha_index_init(mrr_keyno, FALSE))
goto error;
use_default_impl= FALSE;
if (pushed_cond)
h2->idx_cond_push(mrr_keyno, pushed_cond);
} }
else else
{ {
//doing DS-MRR/CPK /*
// fill-buffer-analog We get here when the access alternates betwen MRR scan(s) and non-MRR
// eof scans.
h->mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
h->mrr_funcs= *seq_funcs; Calling h->index_end() will invoke dsmrr_close() for this object,
dsmrr_fill_buffer_cpk(); which will delete h2. We need to keep it, so save put it away and dont
if (dsmrr_eof) let it be deleted:
buf->end_of_used_area= rowids_buf_last; */
DBUG_RETURN(0); // nothing can go wrong while filling the buffer handler *save_h2= h2;
h2= NULL;
int res= (h->inited == handler::INDEX && h->ha_index_end());
h2= save_h2;
use_default_impl= FALSE;
if (res)
goto error;
} }
if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges, if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
...@@ -456,7 +465,7 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, ...@@ -456,7 +465,7 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
adjust *buf to indicate that the remaining buffer space will not be used. adjust *buf to indicate that the remaining buffer space will not be used.
*/ */
if (dsmrr_eof) if (dsmrr_eof)
buf->end_of_used_area= rowids_buf_last; buf->end_of_used_area= mrr_buf_last;
/* /*
h->inited == INDEX may occur when 'range checked for each record' is h->inited == INDEX may occur when 'range checked for each record' is
...@@ -512,6 +521,9 @@ static int rowid_cmp(void *h, uchar *a, uchar *b) ...@@ -512,6 +521,9 @@ static int rowid_cmp(void *h, uchar *a, uchar *b)
rowid and return. rowid and return.
The function assumes that rowids buffer is empty when it is invoked. The function assumes that rowids buffer is empty when it is invoked.
dsmrr_eof is set to indicate whether we've exhausted the list of ranges we're
scanning.
@param h Table handler @param h Table handler
...@@ -526,8 +538,8 @@ int DsMrr_impl::dsmrr_fill_buffer() ...@@ -526,8 +538,8 @@ int DsMrr_impl::dsmrr_fill_buffer()
int res; int res;
DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer"); DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
rowids_buf_cur= rowids_buf; mrr_buf_cur= mrr_buf;
while ((rowids_buf_cur < rowids_buf_end) && while ((mrr_buf_cur < mrr_buf_end) &&
!(res= h2->handler::multi_range_read_next(&range_info))) !(res= h2->handler::multi_range_read_next(&range_info)))
{ {
KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range; KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range;
...@@ -537,13 +549,13 @@ int DsMrr_impl::dsmrr_fill_buffer() ...@@ -537,13 +549,13 @@ int DsMrr_impl::dsmrr_fill_buffer()
/* Put rowid, or {rowid, range_id} pair into the buffer */ /* Put rowid, or {rowid, range_id} pair into the buffer */
h2->position(table->record[0]); h2->position(table->record[0]);
memcpy(rowids_buf_cur, h2->ref, h2->ref_length); memcpy(mrr_buf_cur, h2->ref, h2->ref_length);
rowids_buf_cur += h2->ref_length; mrr_buf_cur += h2->ref_length;
if (is_mrr_assoc) if (is_mrr_assoc)
{ {
memcpy(rowids_buf_cur, &range_info, sizeof(void*)); memcpy(mrr_buf_cur, &range_info, sizeof(void*));
rowids_buf_cur += sizeof(void*); mrr_buf_cur += sizeof(void*);
} }
} }
...@@ -553,27 +565,29 @@ int DsMrr_impl::dsmrr_fill_buffer() ...@@ -553,27 +565,29 @@ int DsMrr_impl::dsmrr_fill_buffer()
/* Sort the buffer contents by rowid */ /* Sort the buffer contents by rowid */
uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*); uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size; uint n_rowids= (mrr_buf_cur - mrr_buf) / elem_size;
my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp, my_qsort2(mrr_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
(void*)h); (void*)h);
rowids_buf_last= rowids_buf_cur; mrr_buf_last= mrr_buf_cur;
rowids_buf_cur= rowids_buf; mrr_buf_cur= mrr_buf;
DBUG_RETURN(0); DBUG_RETURN(0);
} }
/* qsort-compatible function to compare key tuples */ /*
my_qsort2-compatible function to compare key tuples
*/
int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2) int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2)
{ {
DsMrr_impl *dsmrr= (DsMrr_impl*)arg; DsMrr_impl *dsmrr= (DsMrr_impl*)arg;
TABLE *table= dsmrr->h->table; TABLE *table= dsmrr->h->table;
KEY_PART_INFO *part= table->key_info[table->s->primary_key].key_part; KEY_PART_INFO *part= table->key_info[table->s->primary_key].key_part;
KEY_PART_INFO *part_end= part + dsmrr->cpk_n_parts; uchar *key1_end= key1 + dsmrr->cpk_tuple_length;
//uint32 *lengths=item->field_lengths; while (key1 < key1_end)
for (; part < part_end; ++part)
{ {
Field* f = part->field; Field* f = part->field;
int len = part->store_length; int len = part->store_length;
...@@ -582,33 +596,43 @@ int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2) ...@@ -582,33 +596,43 @@ int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2)
return res; return res;
key1 += len; key1 += len;
key2 += len; key2 += len;
part++;
} }
return 0; return 0;
} }
//psergey2: /*
int DsMrr_impl::dsmrr_fill_buffer_cpk() DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort
DESCRIPTION
DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort
dsmrr_eof is set to indicate whether we've exhausted the list of ranges
we're scanning.
*/
void DsMrr_impl::dsmrr_fill_buffer_cpk()
{ {
int res; int res;
KEY_MULTI_RANGE cur_range; KEY_MULTI_RANGE cur_range;
DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer_cpk"); DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer_cpk");
rowids_buf_cur= rowids_buf; mrr_buf_cur= mrr_buf;
while ((rowids_buf_cur < rowids_buf_end) && while ((mrr_buf_cur < mrr_buf_end) &&
!(res= h->mrr_funcs.next(h->mrr_iter, &cur_range))) !(res= h->mrr_funcs.next(h->mrr_iter, &cur_range)))
{ {
DBUG_ASSERT(cur_range.range_flag & EQ_RANGE); DBUG_ASSERT(cur_range.range_flag & EQ_RANGE);
DBUG_ASSERT(cpk_tuple_length == cur_range.start_key.length); DBUG_ASSERT(cpk_tuple_length == cur_range.start_key.length);
/* Put key, or {key, range_id} pair into the buffer */ /* Put key, or {key, range_id} pair into the buffer */
memcpy(rowids_buf_cur, cur_range.start_key.key, cpk_tuple_length); memcpy(mrr_buf_cur, cur_range.start_key.key, cpk_tuple_length);
rowids_buf_cur += cpk_tuple_length; mrr_buf_cur += cpk_tuple_length;
if (is_mrr_assoc) if (is_mrr_assoc)
{ {
memcpy(rowids_buf_cur, &cur_range.ptr, sizeof(void*)); memcpy(mrr_buf_cur, &cur_range.ptr, sizeof(void*));
rowids_buf_cur += sizeof(void*); mrr_buf_cur += sizeof(void*);
} }
} }
...@@ -616,77 +640,82 @@ int DsMrr_impl::dsmrr_fill_buffer_cpk() ...@@ -616,77 +640,82 @@ int DsMrr_impl::dsmrr_fill_buffer_cpk()
/* Sort the buffer contents by rowid */ /* Sort the buffer contents by rowid */
uint elem_size= cpk_tuple_length + (int)is_mrr_assoc * sizeof(void*); uint elem_size= cpk_tuple_length + (int)is_mrr_assoc * sizeof(void*);
uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size; uint n_rowids= (mrr_buf_cur - mrr_buf) / elem_size;
my_qsort2(rowids_buf, n_rowids, elem_size, my_qsort2(mrr_buf, n_rowids, elem_size,
(qsort2_cmp)DsMrr_impl::key_tuple_cmp, (void*)this); (qsort2_cmp)DsMrr_impl::key_tuple_cmp, (void*)this);
rowids_buf_last= rowids_buf_cur; mrr_buf_last= mrr_buf_cur;
rowids_buf_cur= rowids_buf; mrr_buf_cur= mrr_buf;
DBUG_RETURN(0); DBUG_VOID_RETURN;
} }
/* /*
CPK: so, the source is DS-MRR/CPK: multi_range_read_next() function
- buffer exhaustion/re-fill
- advance to next range on "record-not-found" error. DESCRIPTION
- if scanning on a prefix, enumerate all records for a key. DsMrr_impl::dsmrr_next_cpk()
DESCRIPTION
DS-MRR/CPK: multi_range_read_next() function.
This is similar to DsMrr_impl::dsmrr_next(), the differences are that
- we get records with index_read(), not with rnd_pos()
- we may get multiple records for one key (=element of the buffer)
- unlike dsmrr_fill_buffer(), dsmrr_fill_buffer_cpk() never fails.
RETURN
0 OK, next record was successfully read
HA_ERR_END_OF_FILE End of records
Other Some other error
*/ */
int DsMrr_impl::dsmrr_next_cpk(char **range_info) int DsMrr_impl::dsmrr_next_cpk(char **range_info)
{ {
int res; int res;
if (cpk_have_range) if (cpk_have_range)
{ {
res= h->index_next_same(table->record[0], rowids_buf_cur, cpk_tuple_length); res= h->index_next_same(table->record[0], mrr_buf_cur, cpk_tuple_length);
if (res != HA_ERR_END_OF_FILE) if (res != HA_ERR_END_OF_FILE)
{ {
// todo
if (is_mrr_assoc) if (is_mrr_assoc)
memcpy(range_info, &cpk_saved_range_info, sizeof(void*)); memcpy(range_info, &cpk_saved_range_info, sizeof(void*));
return res; return res;
} }
/* /* No more records in this range. Fall through to get to another range */
Ok, we got EOF for records in this range. Fall through to get to another
range.
*/
} }
do do
{ {
/* First, make sure we have a range at start of the buffer*/ /* First, make sure we have a range at start of the buffer */
if (rowids_buf_cur == rowids_buf_last) if (mrr_buf_cur == mrr_buf_last)
{ {
if (dsmrr_eof) if (dsmrr_eof)
{ {
res= HA_ERR_END_OF_FILE; res= HA_ERR_END_OF_FILE;
goto end; goto end;
} }
// TODO: the return values are mix of HA_ERR_ codes and TRUE as "generic dsmrr_fill_buffer_cpk();
// failure" error. Is this ok?
if ((res= dsmrr_fill_buffer_cpk()))
goto end;
} }
if (mrr_buf_cur == mrr_buf_last)
if (rowids_buf_cur == rowids_buf_last)
{ {
res= HA_ERR_END_OF_FILE; res= HA_ERR_END_OF_FILE;
goto end; goto end;
} }
//TODO: make skip_index_tuple() calls, too? //psergey2-todo: make skip_index_tuple() calls, too?
//TODO: skip-record calls here? //psergey2-todo: skip-record calls here?
//if (h2->mrr_funcs.skip_record && //if (h2->mrr_funcs.skip_record &&
// h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid)) // h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
// continue; // continue;
/* Ok, got the range. Try making a lookup. */ /* Ok, got the range. Try making a lookup. */
uchar *lookup_tuple= rowids_buf_cur; uchar *lookup_tuple= mrr_buf_cur;
rowids_buf_cur += cpk_tuple_length; mrr_buf_cur += cpk_tuple_length;
if (is_mrr_assoc) if (is_mrr_assoc)
{ {
memcpy(cpk_saved_range_info, rowids_buf_cur, sizeof(void*)); memcpy(cpk_saved_range_info, mrr_buf_cur, sizeof(void*));
rowids_buf_cur += sizeof(void*) * test(is_mrr_assoc); mrr_buf_cur += sizeof(void*) * test(is_mrr_assoc);
} }
res= h->index_read(table->record[0], lookup_tuple, cpk_tuple_length, res= h->index_read(table->record[0], lookup_tuple, cpk_tuple_length,
...@@ -698,6 +727,10 @@ int DsMrr_impl::dsmrr_next_cpk(char **range_info) ...@@ -698,6 +727,10 @@ int DsMrr_impl::dsmrr_next_cpk(char **range_info)
if (!res) if (!res)
{ {
memcpy(range_info, cpk_saved_range_info, sizeof(void*)); memcpy(range_info, cpk_saved_range_info, sizeof(void*));
/*
Attempt reading more rows from this range only if there actually can
be multiple matches:
*/
cpk_have_range= !cpk_is_unique_scan; cpk_have_range= !cpk_is_unique_scan;
break; break;
} }
...@@ -707,6 +740,7 @@ int DsMrr_impl::dsmrr_next_cpk(char **range_info) ...@@ -707,6 +740,7 @@ int DsMrr_impl::dsmrr_next_cpk(char **range_info)
return res; return res;
} }
/** /**
DS-MRR implementation: multi_range_read_next() function DS-MRR implementation: multi_range_read_next() function
*/ */
...@@ -725,7 +759,7 @@ int DsMrr_impl::dsmrr_next(char **range_info) ...@@ -725,7 +759,7 @@ int DsMrr_impl::dsmrr_next(char **range_info)
do do
{ {
if (rowids_buf_cur == rowids_buf_last) if (mrr_buf_cur == mrr_buf_last)
{ {
if (dsmrr_eof) if (dsmrr_eof)
{ {
...@@ -738,17 +772,17 @@ int DsMrr_impl::dsmrr_next(char **range_info) ...@@ -738,17 +772,17 @@ int DsMrr_impl::dsmrr_next(char **range_info)
} }
/* return eof if there are no rowids in the buffer after re-fill attempt */ /* return eof if there are no rowids in the buffer after re-fill attempt */
if (rowids_buf_cur == rowids_buf_last) if (mrr_buf_cur == mrr_buf_last)
{ {
res= HA_ERR_END_OF_FILE; res= HA_ERR_END_OF_FILE;
goto end; goto end;
} }
rowid= rowids_buf_cur; rowid= mrr_buf_cur;
if (is_mrr_assoc) if (is_mrr_assoc)
memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar**)); memcpy(&cur_range_info, mrr_buf_cur + h->ref_length, sizeof(uchar**));
rowids_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc); mrr_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc);
if (h2->mrr_funcs.skip_record && if (h2->mrr_funcs.skip_record &&
h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid)) h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid))
continue; continue;
...@@ -870,7 +904,33 @@ bool key_uses_partial_cols(TABLE *table, uint keyno) ...@@ -870,7 +904,33 @@ bool key_uses_partial_cols(TABLE *table, uint keyno)
return FALSE; return FALSE;
} }
/**
/*
Check if key/flags allow DS-MRR/CPK strategy to be used
SYNOPSIS
DsMrr_impl::check_cpk_scan()
keyno Index that will be used
mrr_flags
DESCRIPTION
Check if key/flags allow DS-MRR/CPK strategy to be used.
RETURN
TRUE DS-MRR/CPK should be used
FALSE Otherwise
*/
bool DsMrr_impl::check_cpk_scan(uint keyno, uint mrr_flags)
{
return test((mrr_flags & HA_MRR_SINGLE_POINT) &&
!(mrr_flags & HA_MRR_SORTED) &&
keyno == table->s->primary_key &&
h->primary_key_is_clustered());
}
/*
DS-MRR Internals: Choose between Default MRR implementation and DS-MRR DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
Make the choice between using Default MRR implementation and DS-MRR. Make the choice between using Default MRR implementation and DS-MRR.
...@@ -892,13 +952,7 @@ bool key_uses_partial_cols(TABLE *table, uint keyno) ...@@ -892,13 +952,7 @@ bool key_uses_partial_cols(TABLE *table, uint keyno)
@retval TRUE Default MRR implementation should be used @retval TRUE Default MRR implementation should be used
@retval FALSE DS-MRR implementation should be used @retval FALSE DS-MRR implementation should be used
*/ */
bool DsMrr_impl::check_cpk_scan(uint keyno, uint mrr_flags)
{
return test((mrr_flags & HA_MRR_SINGLE_POINT) &&
!(mrr_flags & HA_MRR_SORTED) &&
keyno == table->s->primary_key &&
h->primary_key_is_clustered());
}
bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
uint *bufsz, COST_VECT *cost) uint *bufsz, COST_VECT *cost)
...@@ -906,9 +960,8 @@ bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, ...@@ -906,9 +960,8 @@ bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
COST_VECT dsmrr_cost; COST_VECT dsmrr_cost;
bool res; bool res;
THD *thd= current_thd; THD *thd= current_thd;
//psergey2: check the criteria.
doing_cpk_scan= check_cpk_scan(keyno, *flags);
doing_cpk_scan= check_cpk_scan(keyno, *flags);
if (thd->variables.optimizer_use_mrr == 2 || *flags & HA_MRR_INDEX_ONLY || if (thd->variables.optimizer_use_mrr == 2 || *flags & HA_MRR_INDEX_ONLY ||
(keyno == table->s->primary_key && h->primary_key_is_clustered() && (keyno == table->s->primary_key && h->primary_key_is_clustered() &&
!doing_cpk_scan) || !doing_cpk_scan) ||
......
/* /*
This file contains declarations for This file contains declarations for Disk-Sweep MultiRangeRead (DS-MRR)
- Disk-Sweep MultiRangeRead (DS-MRR) implementation implementation
*/ */
/** /**
A Disk-Sweep MRR interface implementation A Disk-Sweep implementation of MRR Interface (DS-MRR for short)
This implementation makes range (and, in the future, 'ref') scans to read This is a "plugin"(*) for storage engines that allows make index scans
table rows in disk sweeps. read table rows in rowid order. For disk-based storage engines, this is
faster than reading table rows in whatever-SQL-layer-makes-calls-in order.
Currently it is used by MyISAM and InnoDB. Potentially it can be used with
any table handler that has non-clustered indexes and on-disk rows. (*) - only conceptually. No dynamic loading or binary compatibility of any
kind.
General scheme of things:
SQL Layer code
| | |
-v---v---v---- handler->multi_range_read_XXX() function calls
| | |
____________________________________
/ DS-MRR module \
| (scan indexes, order rowids, do |
| full record reads in rowid order) |
\____________________________________/
| | |
-|---|---|----- handler->read_range_first()/read_range_next(),
| | | handler->index_read(), handler->rnd_pos() calls.
| | |
v v v
Storage engine internals
Currently DS-MRR is used by MyISAM, InnoDB/XtraDB and Maria storage engines.
Potentially it can be used with any table handler that has disk-based data
storage and has better performance when reading data in rowid order.
*/
/*
DS-MRR implementation for one table. Create/use one object of this class for
each ha_{myisam/innobase/etc} object. That object will be further referred to
as "the handler"
There are actually three strategies
S1. Bypass DS-MRR, pass all calls to default implementation (i.e. to
MRR-to-non-MRR calls converter)
S2. Regular DS-MRR
S3. DS-MRR/CPK for doing scans on clustered primary keys.
S1 is used for cases which DS-MRR is unable to handle for some reason.
S2 is the actual DS-MRR. The basic algorithm is as follows:
1. Scan the index (and only index, that is, with HA_EXTRA_KEYREAD on) and
fill the buffer with {rowid, range_id} pairs
2. Sort the buffer by rowid
3. for each {rowid, range_id} pair in the buffer
get record by rowid and return the {record, range_id} pair
4. Repeat the above steps until we've exhausted the list of ranges we're
scanning.
S3 is the variant of DS-MRR for use with clustered primary keys (or any
clustered index). The idea is that in clustered index it is sufficient to
access the index in index order, and we don't need an intermediate steps to
get rowid (like step #1 in S2).
DS-MRR/CPK's basic algorithm is as follows:
1. Collect a number of ranges (=lookup keys)
2. Sort them so that they follow in index order.
3. for each {lookup_key, range_id} pair in the buffer
get record(s) matching the lookup key and return {record, range_id} pairs
4. Repeat the above steps until we've exhausted the list of ranges we're
scanning.
*/ */
class DsMrr_impl class DsMrr_impl
...@@ -21,21 +81,39 @@ class DsMrr_impl ...@@ -21,21 +81,39 @@ class DsMrr_impl
DsMrr_impl() DsMrr_impl()
: h2(NULL) {}; : h2(NULL) {};
void init(handler *h_arg, TABLE *table_arg)
{
h= h_arg;
table= table_arg;
}
int dsmrr_init(handler *h, RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
uint n_ranges, uint key_parts, uint mode,
HANDLER_BUFFER *buf);
void dsmrr_close();
int dsmrr_next(char **range_info);
ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint key_parts,
uint *bufsz, uint *flags, COST_VECT *cost);
ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
void *seq_init_param, uint n_ranges, uint *bufsz,
uint *flags, COST_VECT *cost);
private:
/* /*
The "owner" handler object (the one that calls dsmrr_XXX functions. The "owner" handler object (the one that calls dsmrr_XXX functions.
It is used to retrieve full table rows by calling rnd_pos(). It is used to retrieve full table rows by calling rnd_pos().
*/ */
handler *h; handler *h;
TABLE *table; /* Always equal to h->table */ TABLE *table; /* Always equal to h->table */
private:
/* Secondary handler object. It is used for scanning the index */ /* Secondary handler object. It is used for scanning the index */
handler *h2; handler *h2;
/* Buffer to store rowids, or (rowid, range_id) pairs */ /* Buffer to store rowids, or (rowid, range_id) pairs */
uchar *rowids_buf; uchar *mrr_buf;
uchar *rowids_buf_cur; /* Current position when reading/writing */ uchar *mrr_buf_cur; /* Current position when reading/writing */
uchar *rowids_buf_last; /* When reading: end of used buffer space */ uchar *mrr_buf_last; /* When reading: end of used buffer space */
uchar *rowids_buf_end; /* End of the buffer */ uchar *mrr_buf_end; /* End of the buffer */
bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */ bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */
...@@ -44,41 +122,30 @@ class DsMrr_impl ...@@ -44,41 +122,30 @@ class DsMrr_impl
bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */ bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */
bool doing_cpk_scan; bool doing_cpk_scan; /* TRUE <=> DS-MRR/CPK variant is used */
/** DS-MRR/CPK variables start */
/* Length of lookup tuple being used, in bytes */
uint cpk_tuple_length; uint cpk_tuple_length;
uint cpk_n_parts; /*
TRUE <=> We're scanning on a full primary key (and not on prefix), and so
can get max. one match for each key
*/
bool cpk_is_unique_scan; bool cpk_is_unique_scan;
char *cpk_saved_range_info; /* TRUE<=> we're in a middle of enumerating records from a range */
bool cpk_have_range; bool cpk_have_range;
/* Valid if cpk_have_range==TRUE: range_id of the range we're enumerating */
char *cpk_saved_range_info;
bool check_cpk_scan(uint keyno, uint mrr_flags);
static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2);
public:
void init(handler *h_arg, TABLE *table_arg)
{
h= h_arg;
table= table_arg;
}
int dsmrr_init(handler *h, RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
uint n_ranges, uint key_parts, uint mode,
HANDLER_BUFFER *buf);
void dsmrr_close();
int dsmrr_fill_buffer();
int dsmrr_fill_buffer_cpk();
int dsmrr_next(char **range_info);
int dsmrr_next_cpk(char **range_info);
ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint key_parts,
uint *bufsz, uint *flags, COST_VECT *cost);
ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
void *seq_init_param, uint n_ranges, uint *bufsz,
uint *flags, COST_VECT *cost);
private:
bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz,
COST_VECT *cost); COST_VECT *cost);
bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
uint *buffer_size, COST_VECT *cost); uint *buffer_size, COST_VECT *cost);
bool check_cpk_scan(uint keyno, uint mrr_flags);
static int key_tuple_cmp(void* arg, uchar* key1, uchar* key2);
int dsmrr_fill_buffer();
void dsmrr_fill_buffer_cpk();
int dsmrr_next_cpk(char **range_info);
}; };
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment