Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
e25db795
Commit
e25db795
authored
Aug 08, 2010
by
Sergey Petrunya
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
DS-MRR, key-ordered retrievals: commit for buildbot
parent
7391175d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
581 additions
and
255 deletions
+581
-255
sql/multi_range_read.cc
sql/multi_range_read.cc
+444
-243
sql/multi_range_read.h
sql/multi_range_read.h
+137
-12
No files found.
sql/multi_range_read.cc
View file @
e25db795
...
...
@@ -287,6 +287,84 @@ int handler::multi_range_read_next(char **range_info)
* DS-MRR implementation
***************************************************************************/
void
SimpleBuffer
::
write
(
const
uchar
*
data
,
size_t
bytes
)
{
DBUG_ASSERT
(
have_space_for
(
bytes
));
if
(
direction
==
-
1
)
write_pos
-=
bytes
;
memcpy
(
write_pos
,
data
,
bytes
);
if
(
direction
==
1
)
write_pos
+=
bytes
;
}
bool
SimpleBuffer
::
have_space_for
(
size_t
bytes
)
{
if
(
direction
==
1
)
return
(
write_pos
+
bytes
<
end
);
else
return
(
write_pos
-
bytes
>=
start
);
}
size_t
SimpleBuffer
::
used_size
()
{
return
(
direction
==
1
)
?
write_pos
-
read_pos
:
read_pos
-
write_pos
;
}
uchar
*
SimpleBuffer
::
read
(
size_t
bytes
)
{
DBUG_ASSERT
(
have_data
(
bytes
));
uchar
*
res
;
if
(
direction
==
1
)
{
res
=
read_pos
;
read_pos
+=
bytes
;
return
res
;
}
else
{
read_pos
=
read_pos
-
bytes
;
return
read_pos
;
}
}
bool
SimpleBuffer
::
have_data
(
size_t
bytes
)
{
return
(
direction
==
1
)
?
(
write_pos
-
read_pos
>=
(
ptrdiff_t
)
bytes
)
:
(
read_pos
-
write_pos
>=
(
ptrdiff_t
)
bytes
);
}
void
SimpleBuffer
::
reset_for_writing
()
{
if
(
direction
==
1
)
write_pos
=
read_pos
=
start
;
else
write_pos
=
read_pos
=
end
;
}
void
SimpleBuffer
::
reset_for_reading
()
{
/*
Do we need this at all?
if (direction == 1)
pos= start;
else
pos= end;
//end?
*/
}
uchar
*
SimpleBuffer
::
end_of_space
()
{
if
(
direction
==
1
)
return
start
;
else
return
end
;
//TODO: check this.
}
/**
DS-MRR: Initialize and start MRR scan
...
...
@@ -309,7 +387,6 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
void
*
seq_init_param
,
uint
n_ranges
,
uint
mode
,
HANDLER_BUFFER
*
buf
)
{
uint
elem_size
;
Item
*
pushed_cond
=
NULL
;
handler
*
new_h2
=
0
;
THD
*
thd
=
current_thd
;
...
...
@@ -330,69 +407,69 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
}
use_default_impl
=
FALSE
;
is_mrr_assoc
=
!
test
(
mode
&
HA_MRR_NO_ASSOCIATION
);
// psergey2: split the buffer:
/*
psergey2-note: we can't split the buffer here because we don't know how key
length. we'll only be able to do it when we've got the first range.
Figure out what steps we'll need to do
*/
do_sort_keys
=
FALSE
;
if
((
mode
&
HA_MRR_SINGLE_POINT
)
&&
optimizer_flag
(
thd
,
OPTIMIZER_SWITCH_MRR_SORT_KEYS
))
{
//do_sort_keys= TRUE; // will use key buffer to sort keys
;
do_sort_keys
=
TRUE
;
use_key_pointers
=
test
(
mode
&
HA_MRR_MATERIALIZED_KEYS
);
}
/*
do_rowid_fetch
=
FALSE
;
if (!doing_cpk_scan && !index_only_read)
doing_cpk_scan
=
check_cpk_scan
(
h
->
active_index
,
mode
);
if
(
!
doing_cpk_scan
/* && !index_only_read */
)
{
do_rowid_fetch= TRUE; //will use rowid buffer to store/sort rowids, etc
/* Will use rowid buffer to store/sort rowids, etc */
do_rowid_fetch
=
TRUE
;
}
DBUG_ASSERT
(
do_sort_keys
||
do_rowid_fetch
);
if (do_sort_keys && do_rowid_fetch)
{
split buffer space proportionally
}
else
{
// give all space to one buffer
if (do_sort_keys)
{
//sort_buffer_start= ...;
}
else
{
DBUG_ASSERT(do_rowid_fetch);
//rowid_buffer_start= ...;
}
}
full_buf
=
buf
->
buffer
;
full_buf_end
=
buf
->
buffer_end
;
/*
At start, alloc all of the buffer for rowids. Key sorting code will grab a
piece if necessary.
*/
mrr_buf
=
buf
->
buffer
;
mrr_buf_end
=
buf
->
buffer_end
;
rowid_buffer
.
set_buffer_space
(
full_buf
,
full_buf_end
,
1
);
if
(
is_mrr_assoc
)
status_var_increment
(
table
->
in_use
->
status_var
.
ha_multi_range_read_init_count
);
if
((
doing_cpk_scan
=
check_cpk_scan
(
h
->
active_index
,
mode
)))
/*
psergey2-todo: for CPK scans:
- use MRR irrespectively of @@mrr_sort_keys setting,
- dont do rowid retrieval.
*/
if
(
do_sort_keys
)
{
/* It's a DS-MRR/CPK scan */
cpk_tuple_length
=
0
;
/* dummy value telling it needs to be inited */
cpk_have_range
=
FALSE
;
key_tuple_length
=
0
;
/* dummy value telling it needs to be inited */
key_buff_elem_size
=
0
;
in_index_range
=
FALSE
;
h
->
mrr_iter
=
seq_funcs
->
init
(
seq_init_param
,
n_ranges
,
mode
);
h
->
mrr_funcs
=
*
seq_funcs
;
keyno
=
h
->
active_index
!=
MAX_KEY
?
h
->
active_index
:
h2
->
active_index
;
dsmrr_fill_key_buffer
();
if
(
dsmrr_eof
)
buf
->
end_of_used_area
=
mrr_buf_last
;
DBUG_RETURN
(
0
);
/* nothing could go wrong while filling the buffer */
if
(
dsmrr_eof
&&
!
do_rowid_fetch
)
buf
->
end_of_used_area
=
key_buffer
.
end_of_space
();
}
/* In regular DS-MRR, buffer stores {rowid, range_id} pairs */
elem_size
=
h
->
ref_length
+
(
int
)
is_mrr_assoc
*
sizeof
(
void
*
);
mrr_buf_last
=
mrr_buf
+
((
mrr_buf_end
-
mrr_buf
)
/
elem_size
)
*
elem_size
;
mrr_buf_end
=
mrr_buf_last
;
if
(
!
do_rowid_fetch
)
{
/*
We have the keys and won't need to fetch rowids, as key lookup will be
the last operation, done in multi_range_read_next().
*/
DBUG_RETURN
(
0
);
}
rowid_buff_elem_size
=
h
->
ref_length
+
(
is_mrr_assoc
?
sizeof
(
char
*
)
:
0
);
/*
psergey2: this is only needed when
- doing a rowid-to-row scan
...
...
@@ -416,7 +493,7 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
if
(
check_stack_overrun
(
thd
,
5
*
STACK_MIN_SIZE
,
(
uchar
*
)
&
new_h2
))
DBUG_RETURN
(
1
);
DBUG_ASSERT
(
h
->
active_index
!=
MAX_KEY
);
uint
mrr_
keyno
=
h
->
active_index
;
keyno
=
h
->
active_index
;
/* Create a separate handler object to do rnd_pos() calls. */
if
(
!
(
new_h2
=
h
->
clone
(
thd
->
mem_root
))
||
...
...
@@ -426,7 +503,7 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
DBUG_RETURN
(
1
);
}
if
(
mrr_
keyno
==
h
->
pushed_idx_cond_keyno
)
if
(
keyno
==
h
->
pushed_idx_cond_keyno
)
pushed_cond
=
h
->
pushed_idx_cond
;
/*
...
...
@@ -443,12 +520,14 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
h2
=
new_h2
;
/* Ok, now can put it into h2 */
table
->
prepare_for_position
();
h2
->
extra
(
HA_EXTRA_KEYREAD
);
if
(
h2
->
ha_index_init
(
mrr_keyno
,
FALSE
))
h2
->
mrr_funcs
=
*
seq_funcs
;
//psergey3-todo: sort out where to store
h2
->
mrr_iter
=
h
->
mrr_iter
;
if
(
h2
->
ha_index_init
(
keyno
,
FALSE
))
goto
error
;
if
(
pushed_cond
)
h2
->
idx_cond_push
(
mrr_
keyno
,
pushed_cond
);
h2
->
idx_cond_push
(
keyno
,
pushed_cond
);
}
else
{
...
...
@@ -467,10 +546,15 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
if
(
res
)
goto
error
;
}
if
(
!
do_sort_keys
&&
h2
->
handler
::
multi_range_read_init
(
seq_funcs
,
seq_init_param
,
n_ranges
,
mode
,
buf
))
{
goto
error
;
}
if
(
h2
->
handler
::
multi_range_read_init
(
seq_funcs
,
seq_init_param
,
n_ranges
,
mode
,
buf
)
||
dsmrr_fill_rowid_buffer
())
if
(
dsmrr_fill_rowid_buffer
())
{
goto
error
;
}
...
...
@@ -479,7 +563,7 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
adjust *buf to indicate that the remaining buffer space will not be used.
*/
if
(
dsmrr_eof
)
buf
->
end_of_used_area
=
mrr_buf_last
;
buf
->
end_of_used_area
=
rowid_buffer
.
end_of_space
()
;
/*
h->inited == INDEX may occur when 'range checked for each record' is
...
...
@@ -526,21 +610,24 @@ static int rowid_cmp(void *h, uchar *a, uchar *b)
/**
DS-MRR: Fill
the buffer with rowids and sort it by rowid
DS-MRR: Fill
and sort the rowid buffer
{This is an internal function of DiskSweep MRR implementation}
Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
buffer. When the buffer is full or scan is completed, sort the buffer by
rowid and return.
The function assumes that rowids buffer is empty when it is invoked.
New2:
we will need to scan either
- the source sequence getting records
- use dsmrr_next_from_index..
dsmrr_eof is set to indicate whether we've exhausted the list of ranges we're
scanning.
psergey2: this func will 'fill the rowid buffer'. If filling the rowid buffer
requires that key buffer is filled/sorted first, will do that, too.
@param h Table handler
@retval 0 OK, the next portion of rowids is in the buffer,
...
...
@@ -554,40 +641,32 @@ int DsMrr_impl::dsmrr_fill_rowid_buffer()
int
res
;
DBUG_ENTER
(
"DsMrr_impl::dsmrr_fill_rowid_buffer"
);
mrr_buf_cur
=
mrr_buf
;
mrr_buf_next_identical
=
mrr_buf_cur
;
/*
psergey2-todo:
- call here fill/sort key buffer, if needed.
rowid_buffer
.
reset_for_writing
();
identical_rowid_ptr
=
NULL
;
psergey2-todo: then, get keys either from
- multi_range_read_next()
- sorted key buffer
psergey2-todo: if we're traversing an ordered key sequence,
check if next keys are the same as previous.
(note that it's easy as ordered sequence allows forward/backward
navigation so we don't need to buffer things)
*/
while
((
mrr_buf_cur
<
mrr_buf_end
)
&&
!
(
res
=
h2
->
handler
::
multi_range_read_next
(
&
range_info
)))
while
(
rowid_buffer
.
have_space_for
(
rowid_buff_elem_size
))
{
if
(
do_sort_keys
)
res
=
dsmrr_next_from_index
(
&
range_info
);
else
res
=
h2
->
handler
::
multi_range_read_next
(
&
range_info
);
if
(
res
)
break
;
KEY_MULTI_RANGE
*
curr_range
=
&
h2
->
handler
::
mrr_cur_range
;
if
(
h2
->
mrr_funcs
.
skip_index_tuple
&&
if
(
!
do_sort_keys
&&
/* If keys are sorted then this check is already done */
h2
->
mrr_funcs
.
skip_index_tuple
&&
h2
->
mrr_funcs
.
skip_index_tuple
(
h2
->
mrr_iter
,
curr_range
->
ptr
))
continue
;
/* Put rowid, or {rowid, range_id} pair into the buffer */
h2
->
position
(
table
->
record
[
0
]);
memcpy
(
mrr_buf_cur
,
h2
->
ref
,
h2
->
ref_length
);
mrr_buf_cur
+=
h2
->
ref_length
;
rowid_buffer
.
write
(
h2
->
ref
,
h2
->
ref_length
);
if
(
is_mrr_assoc
)
{
memcpy
(
mrr_buf_cur
,
&
range_info
,
sizeof
(
void
*
));
mrr_buf_cur
+=
sizeof
(
void
*
);
}
rowid_buffer
.
write
((
uchar
*
)
&
range_info
,
sizeof
(
void
*
));
}
if
(
res
&&
res
!=
HA_ERR_END_OF_FILE
)
...
...
@@ -596,12 +675,11 @@ int DsMrr_impl::dsmrr_fill_rowid_buffer()
/* Sort the buffer contents by rowid */
uint
elem_size
=
h
->
ref_length
+
(
int
)
is_mrr_assoc
*
sizeof
(
void
*
);
uint
n_rowids
=
(
mrr_buf_cur
-
mrr_buf
)
/
elem_size
;
uint
n_rowids
=
rowid_buffer
.
used_size
(
)
/
elem_size
;
my_qsort2
(
mrr_buf
,
n_rowids
,
elem_size
,
(
qsort2_cmp
)
rowid_cmp
,
(
void
*
)
h
);
mrr_buf_last
=
mrr_buf_cur
;
mrr_buf_cur
=
mrr_buf
;
my_qsort2
(
rowid_buffer
.
used_area
(),
n_rowids
,
elem_size
,
(
qsort2_cmp
)
rowid_cmp
,
(
void
*
)
h
);
DBUG_RETURN
(
0
);
}
...
...
@@ -616,8 +694,8 @@ int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2)
{
DsMrr_impl
*
dsmrr
=
(
DsMrr_impl
*
)
arg
;
TABLE
*
table
=
dsmrr
->
h
->
table
;
KEY_PART_INFO
*
part
=
table
->
key_info
[
table
->
s
->
primary_key
].
key_part
;
int
res
;
KEY_PART_INFO
*
part
=
table
->
key_info
[
dsmrr
->
keyno
].
key_part
;
if
(
dsmrr
->
use_key_pointers
)
{
...
...
@@ -626,15 +704,31 @@ int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2)
key2
=
*
((
uchar
**
)
key2
);
// todo is this alignment-safe?
}
uchar
*
key1_end
=
key1
+
dsmrr
->
cpk
_tuple_length
;
uchar
*
key1_end
=
key1
+
dsmrr
->
key
_tuple_length
;
while
(
key1
<
key1_end
)
{
Field
*
f
=
part
->
field
;
int
len
=
part
->
store_length
;
int
res
=
f
->
cmp
(
key1
,
key2
);
if
(
res
)
if
(
part
->
null_bit
)
{
if
(
*
key1
)
// key1 == NULL
{
if
(
!*
key2
)
// key1(NULL) < key2(notNULL)
return
-
1
;
goto
equals
;
}
else
if
(
*
key2
)
// key1(notNULL) > key2 (NULL)
return
1
;
// Step over NULL byte for f->cmp().
key1
++
;
key2
++
;
len
--
;
}
if
((
res
=
f
->
key_cmp
(
key1
,
key2
)))
return
res
;
equals:
key1
+=
len
;
key2
+=
len
;
part
++
;
...
...
@@ -643,6 +737,68 @@ int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2)
}
/*
Setup key/rowid buffer sizes based on sample_key
DESCRIPTION
Setup key/rowid buffer sizes based on sample_key and its length.
This function must be called when all buffer space is empty.
*/
void
DsMrr_impl
::
setup_buffer_sizes
(
key_range
*
sample_key
)
{
key_tuple_length
=
sample_key
->
length
;
key_tuple_map
=
sample_key
->
keypart_map
;
key_size_in_keybuf
=
use_key_pointers
?
sizeof
(
char
*
)
:
key_tuple_length
;
key_buff_elem_size
=
key_size_in_keybuf
+
(
int
)
is_mrr_assoc
*
sizeof
(
void
*
);
uint
rowid_buf_elem_size
=
h
->
ref_length
+
(
int
)
is_mrr_assoc
*
sizeof
(
char
*
);
KEY
*
key_info
=
&
h
->
table
->
key_info
[
keyno
];
/*
Use rec_per_key statistics as a basis to find out how many rowids
we'll get for each key value.
TODO: are we guaranteed to get r_p_c==1 for unique keys?
TODO: what should be the default value to use when there is no
statistics?
*/
uint
parts
=
my_count_bits
(
key_tuple_map
);
ulong
rpc
;
if
((
rpc
=
key_info
->
rec_per_key
[
parts
-
1
]))
{
rowid_buf_elem_size
*=
rpc
;
}
double
fraction_for_rowids
=
((
double
)
rowid_buf_elem_size
/
((
double
)
rowid_buf_elem_size
+
key_buff_elem_size
));
uint
bytes_for_rowids
=
round
(
fraction_for_rowids
*
(
full_buf_end
-
full_buf
));
uint
bytes_for_keys
=
(
full_buf_end
-
full_buf
)
-
bytes_for_rowids
;
if
(
bytes_for_keys
<
key_buff_elem_size
+
1
)
{
uint
add
=
key_buff_elem_size
+
1
-
bytes_for_keys
;
bytes_for_rowids
-=
add
;
DBUG_ASSERT
(
bytes_for_rowids
>=
(
h
->
ref_length
+
(
int
)
is_mrr_assoc
*
sizeof
(
char
*
)
+
1
));
}
rowid_buffer
.
set_buffer_space
(
full_buf
,
full_buf
+
bytes_for_rowids
,
1
);
key_buffer
.
set_buffer_space
(
full_buf
+
bytes_for_rowids
,
full_buf_end
,
1
);
index_ranges_unique
=
test
(
key_info
->
flags
&
HA_NOSAME
&&
key_info
->
key_parts
==
my_count_bits
(
sample_key
->
keypart_map
));
}
/*
DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort
...
...
@@ -650,69 +806,55 @@ int DsMrr_impl::key_tuple_cmp(void* arg, uchar* key1, uchar* key2)
DsMrr_impl::dsmrr_fill_key_buffer()
DESCRIPTION
DS-MRR/CPK: Fill the buffer with (lookup_tuple, range_id) pairs and sort
DS-MRR/CPK: Enumerate the input range (=key) sequence, fill the key buffer
(lookup_key, range_id) pairs and sort.
dsmrr_eof is set to indicate whether we've exhausted the list of ranges
we're scanning.
psergey2-q: can this be used for filling/sorting key buffer in general case?
a: yes.
qq: can we push sequence iteration init down into here?
*/
void
DsMrr_impl
::
dsmrr_fill_key_buffer
()
{
//psergey2: here, no identicals detection is necessary since we always scan
// the unordered sequence.
int
res
;
KEY_MULTI_RANGE
cur_range
;
DBUG_ENTER
(
"DsMrr_impl::dsmrr_fill_key_buffer"
);
mrr_buf_cur
=
mrr_buf
;
while
((
mrr_buf_cur
<
mrr_buf_end
)
&&
// reset the buffer for writing.
key_buffer
.
reset_for_writing
();
while
((
key_buffer
.
have_space_for
(
key_buff_elem_size
))
&&
!
(
res
=
h
->
mrr_funcs
.
next
(
h
->
mrr_iter
,
&
cur_range
)))
{
DBUG_ASSERT
(
cur_range
.
range_flag
&
EQ_RANGE
);
if
(
!
cpk
_tuple_length
)
if
(
!
key
_tuple_length
)
{
cpk_tuple_length
=
cur_range
.
start_key
.
length
;
key_buf_element_size
=
use_key_pointers
?
sizeof
(
char
*
)
:
cpk_tuple_length
;
cpk_is_unique_scan
=
test
(
table
->
key_info
[
h
->
active_index
].
key_parts
==
my_count_bits
(
cur_range
.
start_key
.
keypart_map
));
uint
elem_size
=
key_buf_element_size
+
(
int
)
is_mrr_assoc
*
sizeof
(
void
*
);
mrr_buf_last
=
mrr_buf
+
((
mrr_buf_end
-
mrr_buf
)
/
elem_size
)
*
elem_size
;
mrr_buf_end
=
mrr_buf_last
;
/* This only happens when we've just started filling the buffer */
DBUG_ASSERT
(
key_buffer
.
used_size
()
==
0
);
setup_buffer_sizes
(
&
cur_range
.
start_key
);
}
//psergey2: if keys are materialized, store pointers, not copy keys
/* Put key, or {key, range_id} pair into the buffer */
if
(
use_key_pointers
)
memcpy
(
mrr_buf_cur
,
&
cur_range
.
start_key
.
key
,
sizeof
(
char
*
));
key_buffer
.
write
((
uchar
*
)
&
cur_range
.
start_key
.
key
,
sizeof
(
char
*
));
else
memcpy
(
mrr_buf_cur
,
cur_range
.
start_key
.
key
,
cpk_tuple_length
);
mrr_buf_cur
+=
key_buf_element_size
;
key_buffer
.
write
(
cur_range
.
start_key
.
key
,
key_tuple_length
);
if
(
is_mrr_assoc
)
{
memcpy
(
mrr_buf_cur
,
&
cur_range
.
ptr
,
sizeof
(
void
*
));
mrr_buf_cur
+=
sizeof
(
void
*
);
}
key_buffer
.
write
((
uchar
*
)
&
cur_range
.
ptr
,
sizeof
(
void
*
));
}
dsmrr_eof
=
test
(
res
);
/* Sort the buffer contents by rowid */
uint
elem_size
=
key_buf_element_size
+
(
int
)
is_mrr_assoc
*
sizeof
(
void
*
);
uint
n_
rowids
=
(
mrr_buf_cur
-
mrr_buf
)
/
elem_size
;
uint
key_elem_size
=
key_size_in_keybuf
+
(
int
)
is_mrr_assoc
*
sizeof
(
void
*
);
uint
n_
keys
=
key_buffer
.
used_size
()
/
key_
elem_size
;
my_qsort2
(
mrr_buf
,
n_rowids
,
elem_size
,
my_qsort2
(
key_buffer
.
used_area
(),
n_keys
,
key_elem_size
,
(
qsort2_cmp
)
DsMrr_impl
::
key_tuple_cmp
,
(
void
*
)
this
);
mrr_buf_last
=
mrr_buf_cur
;
mrr_buf_cur
=
mrr_buf
;
last_identical_key_ptr
=
NULL
;
in_identical_keys_range
=
FALSE
;
DBUG_VOID_RETURN
;
}
...
...
@@ -721,60 +863,105 @@ void DsMrr_impl::dsmrr_fill_key_buffer()
DS-MRR/CPK: multi_range_read_next() function
DESCRIPTION
DsMrr_impl::dsmrr_next_
cpk
()
DsMrr_impl::dsmrr_next_
from_index
()
range_info OUT identifier of range that the returned record belongs to
DESCRIPTION
DS-MRR/CPK: multi_range_read_next() function.
This is similar to DsMrr_impl::dsmrr_next(), the differences are that
- we get records with index_read(), not with rnd_pos()
- we may get multiple records for one key (=element of the buffer)
- unlike dsmrr_fill_rowid_buffer(), dsmrr_fill_key_buffer() never fails.
This function walks over key buffer and does index reads, i.e. it produces
{current_record, range_id} pairs.
The function has the same call contract like multi_range_read_next()'s.
We actually iterate nested sequences:
- a disjoint sequence of index ranges
- each range has multiple records
- each record goes into multiple identical ranges.
RETURN
0 OK, next record was successfully read
HA_ERR_END_OF_FILE End of records
Other Some other error
psergey2-todo: this should detect identical keys.
*/
int
DsMrr_impl
::
dsmrr_next_
cpk
(
char
**
range_info
)
int
DsMrr_impl
::
dsmrr_next_
from_index
(
char
**
range_info_arg
)
{
int
res
;
uchar
*
key_in_buf
;
handler
*
file
=
do_rowid_fetch
?
h2
:
h
;
while
(
cpk_have
_range
)
while
(
in_identical_keys
_range
)
{
//read_and_check:
/* Read record/key pointer from the buffer */
key_in_buf
=
identical_key_it
.
get_next
(
key_size_in_keybuf
);
if
(
is_mrr_assoc
)
cur_range_info
=
(
char
*
)
identical_key_it
.
get_next
(
sizeof
(
void
*
));
if
(
h
->
mrr_funcs
.
skip_record
&&
h
->
mrr_funcs
.
skip_record
(
h
->
mrr_iter
,
cpk_saved_range_info
,
NULL
))
if
(
key_in_buf
==
last_identical_key_ptr
)
{
cpk_have_range
=
FALSE
;
break
;
/* We're looking at the last of the identical keys */
in_identical_keys_range
=
FALSE
;
}
uchar
*
lookup_tuple
=
use_key_pointers
?
(
*
((
uchar
**
)
mrr_buf_cur
))
:
mrr_buf_cur
;
res
=
h
->
index_next_same
(
table
->
record
[
0
],
lookup_tuple
,
cpk_tuple_length
);
if
(
h
->
mrr_funcs
.
skip_index_tuple
&&
h
->
mrr_funcs
.
skip_index_tuple
(
h
->
mrr_iter
,
cpk_saved_range_info
))
check_record:
if
((
h
->
mrr_funcs
.
skip_index_tuple
&&
h
->
mrr_funcs
.
skip_index_tuple
(
h
->
mrr_iter
,
*
(
char
**
)
cur_range_info
))
||
(
h
->
mrr_funcs
.
skip_record
&&
h
->
mrr_funcs
.
skip_record
(
h
->
mrr_iter
,
*
(
char
**
)
cur_range_info
,
NULL
)))
{
continue
;
}
memcpy
(
range_info_arg
,
cur_range_info
,
sizeof
(
void
*
));
return
0
;
}
/* Try returrning next record from the current range */
while
(
in_index_range
)
{
res
=
file
->
ha_index_next_same
(
table
->
record
[
0
],
cur_index_tuple
,
key_tuple_length
);
if
(
res
)
{
if
(
res
!=
HA_ERR_END_OF_FILE
&&
res
!=
HA_ERR_KEY_NOT_FOUND
)
return
res
;
/* Fatal error */
if
(
res
!=
HA_ERR_END_OF_FILE
)
in_index_range
=
FALSE
;
/* no more records here */
break
;
}
if
(
last_identical_key_ptr
)
{
i
f
(
is_mrr_assoc
)
memcpy
(
range_info
,
&
cpk_saved_range_info
,
sizeof
(
void
*
)
);
return
res
;
i
n_identical_keys_range
=
TRUE
;
identical_key_it
.
init
(
&
key_buffer
);
cur_range_info
=
first_identical_range_info
;
}
/* No more records in this range. Exit this loop and go get another range */
cpk_have_range
=
FALSE
;
goto
check_record
;
// goto read_and_check
;
}
do
while
(
1
)
{
DBUG_ASSERT
(
!
in_identical_keys_range
&&
!
in_index_range
);
/* Jump over the keys that were handled by identical key processing */
if
(
last_identical_key_ptr
)
{
while
(
key_buffer
.
read
(
key_size_in_keybuf
)
!=
last_identical_key_ptr
)
{
if
(
is_mrr_assoc
)
key_buffer
.
read
(
sizeof
(
void
*
));
}
if
(
is_mrr_assoc
)
key_buffer
.
read
(
sizeof
(
void
*
));
last_identical_key_ptr
=
NULL
;
}
/* First, make sure we have a range at start of the buffer */
if
(
mrr_buf_cur
==
mrr_buf_last
)
if
(
!
key_buffer
.
have_data
(
key_buff_elem_size
)
)
{
if
(
dsmrr_eof
)
{
...
...
@@ -782,58 +969,56 @@ int DsMrr_impl::dsmrr_next_cpk(char **range_info)
goto
end
;
}
dsmrr_fill_key_buffer
();
}
if
(
mrr_buf_cur
==
mrr_buf_last
)
{
res
=
HA_ERR_END_OF_FILE
;
goto
end
;
if
(
!
key_buffer
.
have_data
(
key_buff_elem_size
))
{
res
=
HA_ERR_END_OF_FILE
;
goto
end
;
}
}
/* Ok, got the range. Try making a lookup. */
uchar
*
lookup_tuple
=
use_key_pointers
?
(
*
((
uchar
**
)
mrr_buf_cur
))
:
mrr_buf_cur
;
mrr_buf_cur
+=
key_buf_element_size
;
/* Get the next range to scan*/
cur_index_tuple
=
key_in_buf
=
key_buffer
.
read
(
key_size_in_keybuf
);
if
(
use_key_pointers
)
cur_index_tuple
=
*
((
uchar
**
)
cur_index_tuple
);
if
(
is_mrr_assoc
)
cur_range_info
=
(
char
*
)
key_buffer
.
read
(
sizeof
(
void
*
));
/* Do index lookup */
if
((
res
=
file
->
ha_index_read_map
(
table
->
record
[
0
],
cur_index_tuple
,
key_tuple_map
,
HA_READ_KEY_EXACT
)))
{
memcpy
(
&
cpk_saved_range_info
,
mrr_buf_cur
,
sizeof
(
void
*
));
mrr_buf_cur
+=
sizeof
(
void
*
)
*
test
(
is_mrr_assoc
);
if
(
res
!=
HA_ERR_END_OF_FILE
&&
res
!=
HA_ERR_KEY_NOT_FOUND
)
return
res
;
continue
;
/* to next key and make another lookup */
}
if
(
h
->
mrr_funcs
.
skip_record
&&
h
->
mrr_funcs
.
skip_record
(
h
->
mrr_iter
,
cpk_saved_range_info
,
NULL
))
continue
;
res
=
h
->
index_read
(
table
->
record
[
0
],
lookup_tuple
,
cpk_tuple_length
,
HA_READ_KEY_EXACT
);
/*
Check pushed index condition. Performance-wise, it does not make any
sense to put this call here (the above call has already accessed the full
record). That's the best I could do, though, because:
- ha_innobase doesn't support IndexConditionPushdown on clustered PK
- MRR interface doesn't allow the storage engine to refuse a pushed index
condition.
Having this call here is not fully harmless: EXPLAIN shows "pushed index
condition", which is technically true but doesn't bring the benefits that
one might expect.
*/
if
(
h
->
mrr_funcs
.
skip_index_tuple
&&
h
->
mrr_funcs
.
skip_index_tuple
(
h
->
mrr_iter
,
cpk_saved_range_info
))
continue
;
/* Check if subsequent keys in the key buffer are the same as this one */
{
uchar
*
ptr
;
identical_key_it
.
init
(
&
key_buffer
);
last_identical_key_ptr
=
NULL
;
while
((
ptr
=
identical_key_it
.
get_next
(
key_size_in_keybuf
)))
{
if
(
is_mrr_assoc
)
identical_key_it
.
get_next
(
sizeof
(
void
*
));
if
(
res
&&
res
!=
HA_ERR_END_OF_FILE
)
goto
end
;
if
(
key_tuple_cmp
(
this
,
key_in_buf
,
ptr
)
)
break
;
if
(
!
res
)
{
memcpy
(
range_info
,
&
cpk_saved_range_info
,
sizeof
(
void
*
));
/*
Attempt reading more rows from this range only if there actually can
be multiple matches:
*/
cpk_have_range
=
!
cpk_is_unique_scan
;
break
;
last_identical_key_ptr
=
ptr
;
}
if
(
last_identical_key_ptr
)
{
in_identical_keys_range
=
TRUE
;
identical_key_it
.
init
(
&
key_buffer
);
first_identical_range_info
=
cur_range_info
;
}
}
}
while
(
true
);
in_index_range
=
!
index_ranges_unique
;
goto
check_record
;
}
end:
return
res
;
...
...
@@ -842,9 +1027,6 @@ int DsMrr_impl::dsmrr_next_cpk(char **range_info)
/**
DS-MRR implementation: multi_range_read_next() function
psergey2-todo: put identical rowid detection code here
it should always work because rowid sequences are always sorted
*/
int
DsMrr_impl
::
dsmrr_next
(
char
**
range_info
)
...
...
@@ -852,89 +1034,108 @@ int DsMrr_impl::dsmrr_next(char **range_info)
int
res
;
uchar
*
cur_range_info
=
0
;
uchar
*
rowid
;
uchar
*
range_id
;
if
(
use_default_impl
)
return
h
->
handler
::
multi_range_read_next
(
range_info
);
if
(
doing_cpk_scan
)
return
dsmrr_next_
cpk
(
range_info
);
if
(
!
do_rowid_fetch
)
return
dsmrr_next_
from_index
(
range_info
);
if
(
mrr_buf_next_identical
!=
mrr_buf_cu
r
)
while
(
identical_rowid_pt
r
)
{
/*
There are multiple rowids. Return the record again, now with different
range_id
Current record (the one we've returned in previous call) was obtained
from a rowid that matched multiple range_ids. Return this record again,
with next matching range_id.
*/
do
rowid
=
rowid_buffer
.
read
(
h
->
ref_length
);
if
(
is_mrr_assoc
)
{
if
(
is_mrr_assoc
)
memcpy
(
range_info
,
mrr_buf_next_identical
+
h
->
ref_length
,
sizeof
(
uchar
*
));
}
while
(
!
h2
->
mrr_funcs
.
skip_record
||
!
h2
->
mrr_funcs
.
skip_record
(
h2
->
mrr_iter
,
(
char
*
)
range_info
,
rowid
));
uchar
*
range_ptr
=
rowid_buffer
.
read
(
sizeof
(
uchar
*
));
memcpy
(
range_info
,
range_ptr
,
sizeof
(
uchar
*
));
}
mrr_buf_next_identical
+=
h
->
ref_length
+
sizeof
(
void
*
)
*
test
(
is_mrr_assoc
);
return
0
;
if
(
rowid
==
identical_rowid_ptr
)
{
identical_rowid_ptr
=
NULL
;
/* reached the last of identical rowids */
}
if
(
!
h2
->
mrr_funcs
.
skip_record
||
!
h2
->
mrr_funcs
.
skip_record
(
h2
->
mrr_iter
,
(
char
*
)
*
range_info
,
rowid
))
{
return
0
;
}
}
do
while
(
1
)
{
if
(
mrr_buf_cur
==
mrr_buf_last
)
if
(
!
rowid_buffer
.
have_data
(
1
)
)
{
if
(
dsmrr_eof
)
{
res
=
HA_ERR_END_OF_FILE
;
goto
end
;
}
res
=
dsmrr_fill_rowid_buffer
();
if
(
res
)
goto
end
;
return
HA_ERR_END_OF_FILE
;
if
(
do_sort_keys
&&
key_buffer
.
used_size
()
==
0
)
dsmrr_fill_key_buffer
();
if
(
(
res
=
dsmrr_fill_rowid_buffer
())
)
return
res
;
}
/* return eof if there are no rowids in the buffer after re-fill attempt */
if
(
mrr_buf_cur
==
mrr_buf_last
)
{
res
=
HA_ERR_END_OF_FILE
;
goto
end
;
}
rowid
=
mrr_buf_cur
;
/* Return eof if there are no rowids in the buffer after re-fill attempt */
if
(
!
rowid_buffer
.
have_data
(
1
))
return
HA_ERR_END_OF_FILE
;
rowid
=
rowid_buffer
.
read
(
h
->
ref_length
);
identical_rowid_ptr
=
NULL
;
if
(
is_mrr_assoc
)
memcpy
(
&
cur_range_info
,
mrr_buf_cur
+
h
->
ref_length
,
sizeof
(
uchar
**
));
{
range_id
=
rowid_buffer
.
read
(
sizeof
(
uchar
*
));
memcpy
(
&
cur_range_info
,
range_id
,
sizeof
(
uchar
*
));
memcpy
(
range_info
,
range_id
,
sizeof
(
uchar
*
));
}
size_t
element_size
=
h
->
ref_length
+
sizeof
(
void
*
)
*
test
(
is_mrr_assoc
);
mrr_buf_cur
+=
element_size
;
mrr_buf_next_identical
=
mrr_buf_cur
;
//psergey2-note: the below isn't right- we won't want to skip over this
// rowid because this (rowid, range_id) pair has nothing.. the next
// identical rowids might have something.. (but we set identicals later,
// dont we?)
if
(
h2
->
mrr_funcs
.
skip_record
&&
h2
->
mrr_funcs
.
skip_record
(
h2
->
mrr_iter
,
(
char
*
)
cur_range_info
,
rowid
))
continue
;
res
=
h
->
ha_rnd_pos
(
table
->
record
[
0
],
rowid
);
if
(
res
==
HA_ERR_RECORD_DELETED
)
continue
;
if
(
0
)
//(!res)
/*
Check if subsequent buffer elements have the same rowid value as this
one. If yes, remember this fact so that we don't make any more rnd_pos()
calls with this value.
*/
if
(
!
res
)
{
/*
Note: this implies that SQL layer doesn't touch table->record[0]
between calls.
*/
uchar
*
current_el
=
mrr_buf_cur
-
element_size
;
while
(
mrr_buf_cur
!=
mrr_buf_last
&&
!
h2
->
cmp_ref
(
current_el
,
mrr_buf_cur
))
uchar
*
ptr
;
SimpleBuffer
::
PeekIterator
identical_rowid_it
;
identical_rowid_it
.
init
(
&
rowid_buffer
);
while
((
ptr
=
identical_rowid_it
.
get_next
(
h
->
ref_length
)))
{
mrr_buf_cur
+=
element_size
;
if
(
is_mrr_assoc
)
identical_rowid_it
.
get_next
(
sizeof
(
void
*
));
if
(
h2
->
cmp_ref
(
rowid
,
ptr
))
break
;
identical_rowid_ptr
=
ptr
;
}
}
break
;
}
while
(
true
);
if
(
is_mrr_assoc
)
{
memcpy
(
range_info
,
rowid
+
h
->
ref_length
,
sizeof
(
void
*
));
return
0
;
}
end:
return
res
;
}
...
...
sql/multi_range_read.h
View file @
e25db795
...
...
@@ -37,6 +37,96 @@
*/
/*
A simple memory buffer for reading and writing.
when writing, there is no user-visible "current" position, although
internally 'pos' points to just after the end of used area (or at the
start of it for reverse buffer).
When reading, there is current position pointing at start (for reverse
buffer, end) of the element that will be read next.
^^ why end for reverse? it's more logical to point at start
One can peek at what's behind that element by using advance_ptr function.
TODO: will the reverse buffer store {tuple; rowid} or {rowid; tuple} pairs?
(why does it matter??? Read and write in the same order and then it
shouldn't matter.)
*/
class
SimpleBuffer
{
uchar
*
start
;
uchar
*
end
;
uchar
*
read_pos
;
uchar
*
write_pos
;
/*
1 <=> buffer grows/is filled/is read from start to end
-1 <=> everthing is done from end to start instead.
*/
int
direction
;
public:
/* Write-mode functions */
void
reset_for_writing
();
void
write
(
const
uchar
*
data
,
size_t
bytes
);
bool
have_space_for
(
size_t
bytes
);
uchar
*
used_area
()
{
return
(
direction
==
1
)
?
read_pos
:
write_pos
;
}
size_t
used_size
();
/* Read-mode functions */
void
reset_for_reading
();
uchar
*
read
(
size_t
bytes
);
bool
have_data
(
size_t
bytes
);
uchar
*
end_of_space
();
/* Control functions */
void
set_buffer_space
(
uchar
*
start_arg
,
uchar
*
end_arg
,
int
direction_arg
)
{
start
=
start_arg
;
end
=
end_arg
;
direction
=
direction_arg
;
reset_for_writing
();
}
friend
class
PeekIterator
;
class
PeekIterator
{
// if direction==1 : pointer to what to return next
// if direction==-1: pointer to the end of what is to be returned next
uchar
*
pos
;
SimpleBuffer
*
sb
;
public:
void
init
(
SimpleBuffer
*
sb_arg
)
{
sb
=
sb_arg
;
pos
=
sb
->
read_pos
;
}
/* Return pointer to next chunk of nbytes bytes and avance over it */
uchar
*
get_next
(
size_t
nbytes
)
{
if
(
sb
->
direction
==
1
)
{
if
(
pos
+
nbytes
>
sb
->
write_pos
)
return
NULL
;
uchar
*
res
=
pos
;
pos
+=
nbytes
;
return
res
;
}
else
{
if
(
pos
-
nbytes
<=
sb
->
write_pos
)
return
NULL
;
pos
-=
nbytes
;
return
pos
;
}
}
};
};
/*
DS-MRR implementation for one table. Create/use one object of this class for
each ha_{myisam/innobase/etc} object. That object will be further referred to
...
...
@@ -73,6 +163,8 @@
scanning.
*/
class
DsMrr_impl
{
public:
...
...
@@ -108,14 +200,27 @@ class DsMrr_impl
/* Secondary handler object. It is used for scanning the index */
handler
*
h2
;
uchar
*
full_buf
;
uchar
*
full_buf_end
;
/* Buffer to store rowids, or (rowid, range_id) pairs */
uchar
*
mrr_buf
;
uchar
*
mrr_buf_cur
;
/* Current position when reading/writing */
uchar
*
mrr_buf_last
;
/* When reading: end of used buffer space */
uchar
*
mrr_buf_end
;
/* End of the buffer */
SimpleBuffer
rowid_buffer
;
uchar
*
identical_rowid_ptr
;
/* Identical keys */
bool
in_identical_keys_range
;
uchar
*
last_identical_key_ptr
;
SimpleBuffer
::
PeekIterator
identical_key_it
;
uchar
*
mrr_buf_next_identical
;
SimpleBuffer
key_buffer
;
uint
keyno
;
/* Execution control */
bool
do_sort_keys
;
bool
use_key_pointers
;
bool
do_rowid_fetch
;
bool
dsmrr_eof
;
/* TRUE <=> We have reached EOF when reading index tuples */
...
...
@@ -129,18 +234,33 @@ class DsMrr_impl
/** DS-MRR/CPK variables start */
/* Length of lookup tuple being used, in bytes */
uint
cpk_tuple_length
;
uint
key_tuple_length
;
key_part_map
key_tuple_map
;
/*
This is
= key_tuple_length if we copy keys to buffer
= sizeof(void*) if we're using pointers to materialized keys.
*/
uint
key_size_in_keybuf
;
/* = key_size_in_keybuf [ + sizeof(range_assoc_info) ] */
uint
key_buff_elem_size
;
/* = h->ref_length [ + sizeof(range_assoc_info) ] */
uint
rowid_buff_elem_size
;
uint
key_buf_element_size
;
/*
TRUE <=> We're scanning on a full primary key (and not on prefix), and so
can get max. one match for each key
*/
bool
cpk_is_unique_scan
;
bool
index_ranges_unique
;
/* TRUE<=> we're in a middle of enumerating records from a range */
bool
cpk_have_range
;
/* Valid if cpk_have_range==TRUE: range_id of the range we're enumerating */
char
*
cpk_saved_range_info
;
bool
in_index_range
;
uchar
*
cur_index_tuple
;
/* if in_index_range==TRUE: range_id of the range we're enumerating */
char
*
cur_range_info
;
char
*
first_identical_range_info
;
bool
choose_mrr_impl
(
uint
keyno
,
ha_rows
rows
,
uint
*
flags
,
uint
*
bufsz
,
COST_VECT
*
cost
);
...
...
@@ -150,6 +270,11 @@ class DsMrr_impl
static
int
key_tuple_cmp
(
void
*
arg
,
uchar
*
key1
,
uchar
*
key2
);
int
dsmrr_fill_rowid_buffer
();
void
dsmrr_fill_key_buffer
();
int
dsmrr_next_cpk
(
char
**
range_info
);
int
dsmrr_next_from_index
(
char
**
range_info
);
void
setup_buffer_sizes
(
key_range
*
sample_key
);
static
range_seq_t
key_buf_seq_init
(
void
*
init_param
,
uint
n_ranges
,
uint
flags
);
static
uint
key_buf_seq_next
(
range_seq_t
rseq
,
KEY_MULTI_RANGE
*
range
);
};
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment