Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
7dbdedcb
Commit
7dbdedcb
authored
Dec 18, 2003
by
unknown
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Precise read time estimates for index_merge/Unique
parent
211bc240
Changes
13
Show whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
604 additions
and
210 deletions
+604
-210
mysql-test/r/index_merge.result
mysql-test/r/index_merge.result
+1
-1
mysql-test/t/index_merge.test
mysql-test/t/index_merge.test
+1
-1
sql/filesort.cc
sql/filesort.cc
+4
-4
sql/ha_berkeley.h
sql/ha_berkeley.h
+1
-1
sql/ha_innodb.cc
sql/ha_innodb.cc
+2
-1
sql/ha_innodb.h
sql/ha_innodb.h
+1
-1
sql/handler.h
sql/handler.h
+2
-2
sql/mysql_priv.h
sql/mysql_priv.h
+20
-0
sql/opt_range.cc
sql/opt_range.cc
+337
-152
sql/opt_range.h
sql/opt_range.h
+51
-45
sql/records.cc
sql/records.cc
+0
-1
sql/sql_class.h
sql/sql_class.h
+2
-1
sql/uniques.cc
sql/uniques.cc
+182
-0
No files found.
mysql-test/r/index_merge.result
View file @
7dbdedcb
drop table if exists t0, t1, t2, t3;
drop table if exists t0, t1, t2, t3
,t4
;
create table t0
(
key1 int not null,
...
...
mysql-test/t/index_merge.test
View file @
7dbdedcb
...
...
@@ -3,7 +3,7 @@
#
--
disable_warnings
drop
table
if
exists
t0
,
t1
,
t2
,
t3
;
drop
table
if
exists
t0
,
t1
,
t2
,
t3
,
t4
;
--
enable_warnings
# Create and fill a table with simple keys
...
...
sql/filesort.cc
View file @
7dbdedcb
...
...
@@ -88,9 +88,9 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
#endif
FILESORT_INFO
table_sort
;
/*
d
on't use table->sort in filesort as it is also used by
QUICK_INDEX_MERGE_SELECT.
work with a copy of it and put it back at the
end
when index_merge select has finished with it.
D
on't use table->sort in filesort as it is also used by
QUICK_INDEX_MERGE_SELECT.
Work with a copy and put it back at the end
when index_merge select has finished with it.
*/
memcpy
(
&
table_sort
,
&
table
->
sort
,
sizeof
(
FILESORT_INFO
));
table
->
sort
.
io_cache
=
NULL
;
...
...
sql/ha_berkeley.h
View file @
7dbdedcb
...
...
@@ -167,7 +167,7 @@ class ha_berkeley: public handler
longlong
get_auto_increment
();
void
print_error
(
int
error
,
myf
errflag
);
uint8
table_cache_type
()
{
return
HA_CACHE_TBL_TRANSACT
;
}
bool
primary_key_is_clustered
_covering
()
{
return
true
;
}
bool
primary_key_is_clustered
()
{
return
true
;
}
};
extern
bool
berkeley_skip
,
berkeley_shared_data
;
...
...
sql/ha_innodb.cc
View file @
7dbdedcb
...
...
@@ -2003,7 +2003,8 @@ build_template(
update field->query_id so that the formula
thd->query_id == field->query_id did not work. */
ibool
index_contains_field
=
dict_index_contains_col_or_prefix
(
index
,
i
);
ibool
index_contains_field
=
dict_index_contains_col_or_prefix
(
index
,
i
);
if
(
templ_type
==
ROW_MYSQL_REC_FIELDS
&&
((
prebuilt
->
read_just_key
&&
!
index_contains_field
)
||
...
...
sql/ha_innodb.h
View file @
7dbdedcb
...
...
@@ -187,7 +187,7 @@ class ha_innobase: public handler
void
init_table_handle_for_HANDLER
();
longlong
get_auto_increment
();
uint8
table_cache_type
()
{
return
HA_CACHE_TBL_ASKTRANSACT
;
}
bool
primary_key_is_clustered
_covering
()
{
return
true
;
}
bool
primary_key_is_clustered
()
{
return
true
;
}
};
extern
bool
innodb_skip
;
...
...
sql/handler.h
View file @
7dbdedcb
...
...
@@ -378,10 +378,10 @@ class handler :public Sql_alloc
/*
RETURN
true
p
rimary key (if there is one) is clustered key covering all fields
true
P
rimary key (if there is one) is clustered key covering all fields
false otherwise
*/
virtual
bool
primary_key_is_clustered
_covering
()
{
return
false
;
}
virtual
bool
primary_key_is_clustered
()
{
return
false
;
}
};
/* Some extern variables used with handlers */
...
...
sql/mysql_priv.h
View file @
7dbdedcb
...
...
@@ -118,6 +118,26 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset;
*/
#define TIME_FOR_COMPARE 5 // 5 compares == one read
/*
Number of comparisons of table rowids equivalent to reading one row from a
table.
*/
#define TIME_FOR_COMPARE_ROWID (TIME_FOR_COMPARE*2)
/*
For sequential disk seeks the cost formula is:
DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST * #blocks_to_skip
The cost of average seek
DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*BLOCKS_IN_AVG_SEEK =1.0.
*/
#define DISK_SEEK_BASE_COST ((double)0.5)
#define BLOCKS_IN_AVG_SEEK 128
#define DISK_SEEK_PROP_COST ((double)0.5/BLOCKS_IN_AVG_SEEK)
/*
Number of rows in a reference table when refereed through a not unique key.
This value is only used when we don't know anything about the key
...
...
sql/opt_range.cc
View file @
7dbdedcb
...
...
@@ -307,12 +307,18 @@ static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree,
QUICK_RANGE_SELECT
*
get_quick_select
(
PARAM
*
param
,
uint
index
,
SEL_ARG
*
key_tree
,
MEM_ROOT
*
alloc
=
NULL
);
static
int
get_quick_select_params
(
SEL_TREE
*
tree
,
PARAM
&
param
,
key_map
&
needed_reg
,
TABLE
*
head
,
static
int
get_quick_select_params
(
SEL_TREE
*
tree
,
PARAM
*
param
,
key_map
&
needed_reg
,
bool
index_read_can_be_used
,
double
*
read_time
,
ha_rows
*
records
,
double
*
read_time
,
ha_rows
*
records
,
SEL_ARG
***
key_to_read
);
static
int
get_index_merge_params
(
PARAM
*
param
,
key_map
&
needed_reg
,
SEL_IMERGE
*
imerge
,
double
*
read_time
,
ha_rows
*
imerge_rows
);
inline
double
get_index_only_read_time
(
PARAM
*
param
,
ha_rows
records
,
int
keynr
);
#ifndef DBUG_OFF
static
void
print_quick_sel_imerge
(
QUICK_INDEX_MERGE_SELECT
*
quick
,
const
key_map
*
needed_reg
);
...
...
@@ -453,7 +459,7 @@ int SEL_IMERGE::or_sel_tree_with_checks(PARAM *param, SEL_TREE *new_tree)
}
}
/*
new tree cannot be combined with any of existing trees
*/
/*
New tree cannot be combined with any of existing trees.
*/
return
or_sel_tree
(
param
,
new_tree
);
}
...
...
@@ -483,7 +489,6 @@ int SEL_IMERGE::or_sel_imerge_with_checks(PARAM *param, SEL_IMERGE* imerge)
/*
Perform AND operation on two index_merge lists and store result in *im1.
*/
inline
void
imerge_list_and_list
(
List
<
SEL_IMERGE
>
*
im1
,
List
<
SEL_IMERGE
>
*
im2
)
...
...
@@ -503,18 +508,16 @@ inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
i.e. all conjuncts except the first one are currently dropped.
This is done to avoid producing N*K ways to do index_merge.
If (a_1||b_1) produce a condition that is always true, NULL is
returned and index_merge is discarded. (while it is actuall
y
possible to try
harder).
If (a_1||b_1) produce a condition that is always true, NULL is
returned
and index_merge is discarded (while it is actually possible to tr
y
harder).
As a consequence of this, choice of keys to do index_merge
read may depend on the order of conditions in WHERE part of
the query.
As a consequence of this, choice of keys to do index_merge read may depend
on the order of conditions in WHERE part of the query.
RETURN
0 OK, result is stored in *im1
other Error, both passed lists are unusable
*/
int
imerge_list_or_list
(
PARAM
*
param
,
...
...
@@ -533,7 +536,7 @@ int imerge_list_or_list(PARAM *param,
Perform OR operation on index_merge list and key tree.
RETURN
0 OK, result is stored in *im1
0 OK, result is stored in *im1
.
other Error
*/
...
...
@@ -685,10 +688,10 @@ bool
QUICK_INDEX_MERGE_SELECT
::
push_quick_back
(
QUICK_RANGE_SELECT
*
quick_sel_range
)
{
/*
Save quick_select that does scan on clustered
covering primary key as
it will be processed separately
Save quick_select that does scan on clustered
primary key as it will be
processed separately.
*/
if
(
head
->
file
->
primary_key_is_clustered
_covering
()
&&
if
(
head
->
file
->
primary_key_is_clustered
()
&&
quick_sel_range
->
index
==
head
->
primary_key
)
pk_quick_select
=
quick_sel_range
;
else
...
...
@@ -1001,7 +1004,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
ha_rows
found_records
;
double
found_read_time
=
read_time
;
if
(
!
get_quick_select_params
(
tree
,
param
,
needed_reg
,
head
,
true
,
if
(
!
get_quick_select_params
(
tree
,
&
param
,
needed_reg
,
true
,
&
found_read_time
,
&
found_records
,
&
best_key
))
{
...
...
@@ -1021,120 +1024,57 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
}
/*
b
tw, tree type SEL_TREE::INDEX_MERGE was not introduced
intentionally
B
tw, tree type SEL_TREE::INDEX_MERGE was not introduced
intentionally
.
*/
/*
if no range select could be built, try using index_merge
*/
/*
If no range select could be built, try using index_merge.
*/
if
(
!
quick
&&
!
tree
->
merges
.
is_empty
())
{
DBUG_PRINT
(
"info"
,(
"No range reads possible,"
" trying to construct index_merge"
));
SEL_IMERGE
*
imerge
;
SEL_IMERGE
*
min_imerge
=
NULL
;
double
min_imerge_
cost
=
DBL_MAX
;
double
min_imerge_
read_time
;
ha_rows
min_imerge_records
;
List_iterator_fast
<
SEL_IMERGE
>
it
(
tree
->
merges
);
/* find index_merge with minimal cost */
while
((
imerge
=
it
++
))
{
bool
imerge_failed
=
false
;
double
imerge_cost
=
0
;
ha_rows
imerge_total_records
=
0
;
double
tree_read_time
;
ha_rows
tree_records
;
imerge
->
best_keys
=
(
SEL_ARG
***
)
alloc_root
(
&
alloc
,
(
imerge
->
trees_next
-
imerge
->
trees
)
*
sizeof
(
void
*
));
/*
It may be possible to use different keys for index_merge, e.g for
queries like
...WHERE (key1 < c2 AND key2 < c2) OR (key3 < c3 AND key4 < c4)
We assume we get the best index_merge if we choose the best key
read inside each of the conjuncts.
*/
for
(
SEL_TREE
**
ptree
=
imerge
->
trees
;
ptree
!=
imerge
->
trees_next
;
ptree
++
)
if
(
!
head
->
used_keys
.
is_clear_all
())
{
tree_read_time
=
read_time
;
if
(
get_quick_select_params
(
*
ptree
,
param
,
needed_reg
,
head
,
false
,
&
tree_read_time
,
&
tree_records
,
&
(
imerge
->
best_keys
[
ptree
-
imerge
->
trees
])))
imerge_failed
=
true
;
imerge_cost
+=
tree_read_time
;
imerge_total_records
+=
tree_records
;
int
key_for_use
=
find_shortest_key
(
head
,
&
head
->
used_keys
);
ha_rows
total_table_records
=
(
0
==
head
->
file
->
records
)
?
1
:
head
->
file
->
records
;
read_time
=
get_index_only_read_time
(
&
param
,
total_table_records
,
key_for_use
);
DBUG_PRINT
(
"info"
,
(
"'all' scan will be using key %d, read time %g"
,
key_for_use
,
read_time
));
}
if
(
!
imerge_failed
)
{
imerge_total_records
=
min
(
imerge_total_records
,
head
->
file
->
records
);
imerge_cost
+=
imerge_total_records
/
TIME_FOR_COMPARE
;
if
(
imerge_cost
<
min_imerge_cost
)
min_imerge_read_time
=
read_time
;
/*
Ok, read_time contains best 'all' read time.
Now look for index_merge with cost < read_time
*/
List_iterator_fast
<
SEL_IMERGE
>
it
(
tree
->
merges
);
while
((
imerge
=
it
++
))
{
if
(
!
get_index_merge_params
(
&
param
,
needed_reg
,
imerge
,
&
min_imerge_read_time
,
&
min_imerge_records
))
min_imerge
=
imerge
;
min_imerge_cost
=
imerge_cost
;
min_imerge_records
=
imerge_total_records
;
}
}
}
if
(
!
min_imerge
)
goto
end_free
;
records
=
min_imerge_records
;
/*
Ok, got minimal index merge, *min_imerge, with cost min_imerge_cost
Compare its cost with "all" scan cost (or "all+using index" if
it is possible) and choose the best.
*/
if
(
!
head
->
used_keys
.
is_clear_all
())
{
/* check if "ALL" +"using index" read would be faster */
int
key_for_use
=
find_shortest_key
(
head
,
&
head
->
used_keys
);
ha_rows
total_table_records
=
(
0
==
head
->
file
->
records
)
?
1
:
head
->
file
->
records
;
uint
keys_per_block
=
(
head
->
file
->
block_size
/
2
/
(
head
->
key_info
[
key_for_use
].
key_length
+
head
->
file
->
ref_length
)
+
1
);
double
all_index_scan_read_time
=
((
double
)(
total_table_records
+
keys_per_block
-
1
)
/
(
double
)
keys_per_block
);
DBUG_PRINT
(
"info"
,
(
"'all' scan will be using key %d, read time %g"
,
key_for_use
,
all_index_scan_read_time
));
if
(
all_index_scan_read_time
<
min_imerge_cost
)
{
DBUG_PRINT
(
"info"
,
(
"index merge would be slower, "
"will do full 'index' scan"
));
goto
end_free
;
}
}
else
{
/* check if "ALL" would be faster */
if
(
read_time
<
min_imerge_cost
)
{
DBUG_PRINT
(
"info"
,
(
"index merge would be slower, "
"will do full table scan"
));
goto
end_free
;
}
}
/* Ok, using index_merge is the best option, so construct it. */
if
(
!
(
quick
=
quick_imerge
=
new
QUICK_INDEX_MERGE_SELECT
(
thd
,
head
)))
goto
end_free
;
quick
->
records
=
min_imerge_records
;
quick
->
read_time
=
min_imerge_
cost
;
quick
->
read_time
=
min_imerge_
read_time
;
my_pthread_setspecific_ptr
(
THR_MALLOC
,
&
quick_imerge
->
alloc
);
...
...
@@ -1152,10 +1092,10 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
&
quick_imerge
->
alloc
)))
{
new_quick
->
records
=
min_imerge_records
;
new_quick
->
read_time
=
min_imerge_
cost
;
new_quick
->
read_time
=
min_imerge_
read_time
;
/*
QUICK_RANGE_SELECT::QUICK_RANGE_SELECT leaves THR_MALLOC
pointing to its allocator, restore it back
pointing to its allocator, restore it back
.
*/
quick_imerge
->
last_quick_select
=
new_quick
;
...
...
@@ -1213,16 +1153,266 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
}
/*
Calculate index merge cost and save parameters for its construction.
SYNOPSIS
get_index_merge_params()
param in parameter with structure.
needed_reg in/out needed_reg from this SQL_SELECT
imerge in index_merge description structure
read_time in/out in: cost of an existing way to read a table
out: cost of index merge
imerge_rows out pessimistic estimate of # of rows to be retrieved
RETURN
0 Cost of this index_merge is less than passed *read_time,
*imerge_rows and *read_time contain new index_merge parameters.
1 Cost of this index_merge is more than *read_time,
*imerge_rows and *read_time are not modified.
-1 error
NOTES
index_merge_cost =
cost(index_reads) + (see #1)
cost(rowid_to_row_scan) + (see #2)
cost(unique_use) (see #3)
1. cost(index_reads) =SUM_i(cost(index_read_i))
For non-CPK scans,
cost(index_read_i) = {cost of ordinary 'index only' scan}
For CPK scan,
cost(index_read_i) = {cost of non-'index only' scan}
2. cost(rowid_to_row_scan)
If table PK is clustered then
cost(rowid_to_row_scan) =
{cost of ordinary clustered PK scan with n_ranges=n_rows}
Otherwise, we use the following model to calculate costs:
We need to retrieve n_rows rows from file that occupies n_blocks blocks.
We assume that offsets of rows we need are independent variates with
uniform distribution in [0..max_file_offset] range.
We'll denote block as "busy" if it contains row(s) we need to retrieve
and "empty" if doesn't contain rows we need.
Probability that a block is empty is (1 - 1/n_blocks)^n_rows (this
applies to any block in file). Let x_i be a variate taking value 1 if
block #i is empty and 0 otherwise.
Then E(x_i) = (1 - 1/n_blocks)^n_rows;
E(n_empty_blocks) = E(sum(x_i)) = sum(E(x_i)) =
= n_blocks * ((1 - 1/n_blocks)^n_rows) =
~= n_blocks * exp(-n_rows/n_blocks).
E(n_busy_blocks) = n_blocks*(1 - (1 - 1/n_blocks)^n_rows) =
~= n_blocks * (1 - exp(-n_rows/n_blocks)).
Average size of "hole" between neighbor non-empty blocks is
E(hole_size) = n_blocks/E(n_busy_blocks).
The total cost of reading all needed blocks in one "sweep" is:
E(n_busy_blocks)*
(DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)).
3. Cost of Unique use is calculated in Unique::get_use_cost function.
*/
static
int
get_index_merge_params
(
PARAM
*
param
,
key_map
&
needed_reg
,
SEL_IMERGE
*
imerge
,
double
*
read_time
,
ha_rows
*
imerge_rows
)
{
double
imerge_cost
=
0.0
;
/* cost of this index_merge */
bool
imerge_too_expensive
=
false
;
double
tree_read_time
;
ha_rows
tree_records
;
bool
pk_is_clustered
=
param
->
table
->
file
->
primary_key_is_clustered
();
bool
have_cpk_scan
;
ha_rows
records_for_unique
=
0
;
ha_rows
cpk_records
=
0
;
DBUG_ENTER
(
"get_index_merge_params"
);
/* allocate structs to save construction info */
imerge
->
best_keys
=
(
SEL_ARG
***
)
alloc_root
(
param
->
mem_root
,
(
imerge
->
trees_next
-
imerge
->
trees
)
*
sizeof
(
void
*
));
/*
PHASE 1: get the best keys to use for this index_merge
*/
/*
It may be possible to use different keys for index_merge scans,
e.g. for query like
...WHERE (key1 < c2 AND key2 < c2) OR (key3 < c3 AND key4 < c4)
we have to make choice between key1 and key2 for one scan and
between key3,key4 for another.
We assume we'll get the best way if we choose the best key read
inside each of the conjuncts. Comparison is done without 'using index'.
*/
for
(
SEL_TREE
**
ptree
=
imerge
->
trees
;
ptree
!=
imerge
->
trees_next
;
ptree
++
)
{
SEL_ARG
**
tree_best_key
;
uint
keynr
;
tree_read_time
=
*
read_time
;
if
(
get_quick_select_params
(
*
ptree
,
param
,
needed_reg
,
false
,
&
tree_read_time
,
&
tree_records
,
&
tree_best_key
))
{
/*
Non-'index only' range scan on a one in index_merge key is more
expensive than other available option. The entire index_merge will be
more expensive then, too. We continue here only to update SQL_SELECT
members.
*/
imerge_too_expensive
=
true
;
}
if
(
imerge_too_expensive
)
continue
;
imerge
->
best_keys
[
ptree
-
imerge
->
trees
]
=
tree_best_key
;
keynr
=
param
->
real_keynr
[(
tree_best_key
-
(
*
ptree
)
->
keys
)];
if
(
pk_is_clustered
&&
keynr
==
param
->
table
->
primary_key
)
{
/* This is a Clustered PK scan, it will be done without 'index only' */
imerge_cost
+=
tree_read_time
;
have_cpk_scan
=
true
;
cpk_records
=
tree_records
;
}
else
{
/* Non-CPK scan, calculate time to do it using 'index only' */
imerge_cost
+=
get_index_only_read_time
(
param
,
tree_records
,
keynr
);
records_for_unique
+=
tree_records
;
}
}
if
(
imerge_too_expensive
)
DBUG_RETURN
(
1
);
if
((
imerge_cost
>
*
read_time
)
||
((
records_for_unique
+
cpk_records
)
>=
param
->
table
->
file
->
records
)
&&
*
read_time
!=
DBL_MAX
)
{
/* Bail out if it is obvious that index_merge would be more expensive */
DBUG_RETURN
(
1
);
}
if
(
have_cpk_scan
)
{
/*
Add one ROWID comparison for each row retrieved on non-CPK scan.
(it is done in QUICK_RANGE_SELECT::row_in_ranges)
*/
imerge_cost
+=
records_for_unique
/
TIME_FOR_COMPARE_ROWID
;
}
/* PHASE 2: Calculate cost(rowid_to_row_scan) */
if
(
pk_is_clustered
)
{
imerge_cost
+=
param
->
table
->
file
->
read_time
(
param
->
table
->
primary_key
,
records_for_unique
,
records_for_unique
)
+
rows2double
(
records_for_unique
)
/
TIME_FOR_COMPARE
;
}
else
{
double
n_blocks
=
ceil
((
double
)(
longlong
)
param
->
table
->
file
->
data_file_length
/
IO_SIZE
);
/* Don't ceil the following as it is an estimate */
double
busy_blocks
=
n_blocks
*
(
1.0
-
pow
(
1.0
-
1.0
/
n_blocks
,
records_for_unique
));
JOIN
*
join
=
param
->
thd
->
lex
->
select_lex
.
join
;
if
(
!
join
||
join
->
tables
==
1
)
{
imerge_cost
+=
busy_blocks
*
(
DISK_SEEK_BASE_COST
+
DISK_SEEK_PROP_COST
*
n_blocks
/
busy_blocks
);
}
else
{
/*
It can be a join with source table being non-last table, so assume
that disk seeks are random here.
(TODO it is possible to determine if this *is* a last table in 'index
checked for each record'-type join)
*/
imerge_cost
+=
busy_blocks
;
}
}
/* PHASE 3: Add Unique operations cost */
imerge_cost
+=
Unique
::
get_use_cost
(
param
->
mem_root
,
records_for_unique
,
param
->
table
->
file
->
ref_length
,
param
->
thd
->
variables
.
sortbuff_size
);
if
(
imerge_cost
<
*
read_time
)
{
*
read_time
=
imerge_cost
;
records_for_unique
+=
cpk_records
;
*
imerge_rows
=
min
(
records_for_unique
,
param
->
table
->
file
->
records
);
DBUG_RETURN
(
0
);
}
DBUG_RETURN
(
1
);
}
/*
Calculate cost of 'index only' scan for given index and number of records.
(We can resolve this by only reading through this key.)
SYNOPSIS
get_whole_index_read_time()
param parameters structure
records #of records to read
keynr key to read
NOTES
It is assumed that we will read trough the whole key range and that all
key blocks are half full (normally things are much better).
*/
inline
double
get_index_only_read_time
(
PARAM
*
param
,
ha_rows
records
,
int
keynr
)
{
double
read_time
;
uint
keys_per_block
=
(
param
->
table
->
file
->
block_size
/
2
/
(
param
->
table
->
key_info
[
keynr
].
key_length
+
param
->
table
->
file
->
ref_length
)
+
1
);
read_time
=
((
double
)
(
records
+
keys_per_block
-
1
)
/
(
double
)
keys_per_block
);
return
read_time
;
}
/*
Calculate quick range select read time, # of records, and best key to use
without constructing QUICK_RANGE_SELECT object.
SYNOPSIS
get_quick_select_params
tree in make range select for this SEL_TREE
param in parameters from test_quick_select
needed_reg in/out other table data needed by this quick_select
index_read_can_be_used if false, assume that 'index only' option is not
available.
read_time out read time estimate
records out # of records estimate
key_to_read out SEL_ARG to be used for creating quick select
*/
static
int
get_quick_select_params
(
SEL_TREE
*
tree
,
PARAM
&
param
,
key_map
&
needed_reg
,
TABLE
*
head
,
static
int
get_quick_select_params
(
SEL_TREE
*
tree
,
PARAM
*
param
,
key_map
&
needed_reg
,
bool
index_read_can_be_used
,
double
*
read_time
,
ha_rows
*
records
,
SEL_ARG
***
key_to_read
)
double
*
read_time
,
ha_rows
*
records
,
SEL_ARG
***
key_to_read
)
{
int
idx
;
int
result
=
1
;
...
...
@@ -1233,7 +1423,7 @@ static int get_quick_select_params(SEL_TREE *tree, PARAM& param,
*/
SEL_ARG
**
key
,
**
end
;
for
(
idx
=
0
,
key
=
tree
->
keys
,
end
=
key
+
param
.
keys
;
for
(
idx
=
0
,
key
=
tree
->
keys
,
end
=
key
+
param
->
keys
;
key
!=
end
;
key
++
,
idx
++
)
{
...
...
@@ -1241,16 +1431,18 @@ static int get_quick_select_params(SEL_TREE *tree, PARAM& param,
double
found_read_time
;
if
(
*
key
)
{
uint
keynr
=
param
.
real_keynr
[
idx
];
uint
keynr
=
param
->
real_keynr
[
idx
];
if
((
*
key
)
->
type
==
SEL_ARG
::
MAYBE_KEY
||
(
*
key
)
->
maybe_flag
)
needed_reg
.
set_bit
(
keynr
);
bool
read_index_only
=
index_read_can_be_used
?
head
->
used_keys
.
is_set
(
keynr
)
:
false
;
found_records
=
check_quick_select
(
&
param
,
idx
,
*
key
);
bool
read_index_only
=
index_read_can_be_used
?
param
->
table
->
used_keys
.
is_set
(
keynr
)
:
false
;
found_records
=
check_quick_select
(
param
,
idx
,
*
key
);
if
(
found_records
!=
HA_POS_ERROR
&&
found_records
>
2
&&
read_index_only
&&
(
head
->
file
->
index_flags
(
keynr
)
&
HA_KEY_READ_ONLY
))
(
param
->
table
->
file
->
index_flags
(
keynr
)
&
HA_KEY_READ_ONLY
))
{
/*
We can resolve this by only reading through this key.
...
...
@@ -1258,15 +1450,11 @@ static int get_quick_select_params(SEL_TREE *tree, PARAM& param,
and that all key blocks are half full (normally things are
much better).
*/
uint
keys_per_block
=
(
head
->
file
->
block_size
/
2
/
(
head
->
key_info
[
keynr
].
key_length
+
head
->
file
->
ref_length
)
+
1
);
found_read_time
=
((
double
)
(
found_records
+
keys_per_block
-
1
)
/
(
double
)
keys_per_block
);
found_read_time
=
get_index_only_read_time
(
param
,
found_records
,
keynr
);
}
else
found_read_time
=
(
head
->
file
->
read_time
(
keynr
,
param
.
range_count
,
found_read_time
=
(
param
->
table
->
file
->
read_time
(
keynr
,
param
->
range_count
,
found_records
)
+
(
double
)
found_records
/
TIME_FOR_COMPARE
);
if
(
*
read_time
>
found_read_time
&&
found_records
!=
HA_POS_ERROR
)
...
...
@@ -3118,8 +3306,8 @@ QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
/*
Fetch all row ids into unique.
If table has a clustered primary key
(PK) that contains all rows (bdb and
innodb currently) and one of the index_merge scans is a scan on primary key
,
If table has a clustered primary key
that covers all rows (true for bdb
and innodb currently) and one of the index_merge scans is a scan on PK
,
then
primary key scan rowids are not put into Unique and also
rows that will be retrieved by PK scan are not put into Unique
...
...
@@ -3134,15 +3322,15 @@ int QUICK_INDEX_MERGE_SELECT::prepare_unique()
int
result
;
DBUG_ENTER
(
"QUICK_INDEX_MERGE_SELECT::prepare_unique"
);
/*
w
e're going to just read rowids. */
/*
W
e're going to just read rowids. */
head
->
file
->
extra
(
HA_EXTRA_KEYREAD
);
/*
Make innodb retrieve all PK member fields, so
* ha_innobase::position (which uses them) call works.
*
we filter out rows retrieved by CC
PK.
*
We can filter out rows that will be retrieved by clustered
PK.
(This also creates a deficiency - it is possible that we will retrieve
parts of key that are not used by current query at all)
parts of key that are not used by current query at all
.
)
*/
head
->
file
->
extra
(
HA_EXTRA_RETRIEVE_ALL_COLS
);
...
...
@@ -3170,22 +3358,15 @@ int QUICK_INDEX_MERGE_SELECT::prepare_unique()
if
(
result
)
{
/*
table read error (including HA_ERR_END_OF_FILE on last quick select
in index_merge)
*/
if
(
result
!=
HA_ERR_END_OF_FILE
)
{
DBUG_RETURN
(
result
);
}
else
break
;
}
if
(
thd
->
killed
)
DBUG_RETURN
(
1
);
/* skip row if it will be retrieved by clustered
covering
PK scan */
/* skip row if it will be retrieved by clustered PK scan */
if
(
pk_quick_select
&&
pk_quick_select
->
row_in_ranges
())
continue
;
...
...
@@ -3207,14 +3388,16 @@ int QUICK_INDEX_MERGE_SELECT::prepare_unique()
DBUG_RETURN
(
result
);
}
/*
Get next row for index_merge.
NOTES
The rows are read from
1. rowids stored in Unique.
2. QUICK_RANGE_SELECT with clustered primary key (if any).
t
he sets of rows retrieved in 1) and 2) are guaranteed to be disjoint.
T
he sets of rows retrieved in 1) and 2) are guaranteed to be disjoint.
*/
int
QUICK_INDEX_MERGE_SELECT
::
get_next
()
{
int
result
;
...
...
@@ -3228,8 +3411,8 @@ int QUICK_INDEX_MERGE_SELECT::get_next()
if
(
result
==
-
1
)
{
result
=
HA_ERR_END_OF_FILE
;
/* All rows from Unique have been retrieved, do a CCPK scan */
end_read_record
(
&
read_record
);
/* All rows from Unique have been retrieved, do a clustered PK scan */
if
(
pk_quick_select
)
{
doing_pk_scan
=
true
;
...
...
@@ -3275,7 +3458,8 @@ int QUICK_RANGE_SELECT::get_next()
if
(
!
cur_range
)
range
=
*
(
cur_range
=
(
QUICK_RANGE
**
)
ranges
.
buffer
);
else
range
=
(
cur_range
==
((
QUICK_RANGE
**
)
ranges
.
buffer
+
ranges
.
elements
-
1
))
?
range
=
(
cur_range
==
((
QUICK_RANGE
**
)
ranges
.
buffer
+
ranges
.
elements
-
1
))
?
NULL:
*
(
++
cur_range
);
if
(
!
range
)
...
...
@@ -3371,16 +3555,17 @@ int QUICK_RANGE_SELECT::cmp_next(QUICK_RANGE *range_arg)
/*
Check if current row will be retrieved by this QUICK_RANGE_SELECT
(this is used to filter out CCPK scan rows in index_merge).
NOTES
It is assumed that currently a scan is being done on another index
which reads all necessary parts of the index that is scanned by this
quick select.
The implementation does a binary search on sorted array of disjoint
ranges, without taking size of range into account.
This function is used to filter out clustered PK scan rows in
index_merge quick select.
RETURN
true if current row will be retrieved by this quick select
false if not
...
...
sql/opt_range.h
View file @
7dbdedcb
...
...
@@ -118,11 +118,13 @@ class QUICK_RANGE_SELECT : public QUICK_SELECT_I
protected:
friend
void
print_quick_sel_range
(
QUICK_RANGE_SELECT
*
quick
,
const
key_map
*
needed_reg
);
friend
QUICK_RANGE_SELECT
*
get_quick_select_for_ref
(
THD
*
thd
,
TABLE
*
table
,
friend
QUICK_RANGE_SELECT
*
get_quick_select_for_ref
(
THD
*
thd
,
TABLE
*
table
,
struct
st_table_ref
*
ref
);
friend
bool
get_quick_keys
(
struct
st_qsel_param
*
param
,
QUICK_RANGE_SELECT
*
quick
,
KEY_PART
*
key
,
SEL_ARG
*
key_tree
,
char
*
min_key
,
uint
min_key_flag
,
SEL_ARG
*
key_tree
,
char
*
min_key
,
uint
min_key_flag
,
char
*
max_key
,
uint
max_key_flag
);
friend
QUICK_RANGE_SELECT
*
get_quick_select
(
struct
st_qsel_param
*
,
uint
idx
,
SEL_ARG
*
key_tree
,
...
...
@@ -160,58 +162,62 @@ class QUICK_RANGE_SELECT : public QUICK_SELECT_I
/*
QUICK_INDEX_MERGE_SELECT - index_merge acce
s method quick select.
QUICK_INDEX_MERGE_SELECT - index_merge acces
s method quick select.
QUICK_INDEX_MERGE_SELECT uses
* QUICK_RANGE_SELECTs to get rows
* Unique class to remove duplicate rows
INDEX MERGE OPTIMIZER
Current implementation doesn't detect all cases where index_merge could be
used, in particular:
* index_merge will never be used if range scan is possible (even if range
scan is more expensive)
INDEX MERGE OPTIMIZER
Current implementation doesn't detect all cases where index_merge could
be
used, in particular:
* index_merge will never be used if range scan is possible (even if
range
scan is more expensive)
* index_merge+'using index' is not supported (this the consequence of the
above restriction)
* index_merge+'using index' is not supported (this the consequence of
the
above restriction)
* If WHERE part contains complex nested AND and OR conditions, some ways to
retrieve rows using index_merge will not be considered. The choice of
read plan may depend on the order of conjuncts/disjuncts in WHERE part of
the query, see comments near SEL_IMERGE::or_sel_tree_with_checks and
imerge_list_or_list function
for details.
* If WHERE part contains complex nested AND and OR conditions, some ways
to retrieve rows using index_merge will not be considered. The choice
of read plan may depend on the order of conjuncts/disjuncts in WHERE
part of the query, see comments near imerge_list_or_list and
SEL_IMERGE::or_sel_tree_with_checks functions
for details.
* there is no "index_merge_ref" method (but index_merge on non-first table
in join is possible with 'range checked for each record').
* There is no "index_merge_ref" method (but index_merge on non-first
table
in join is possible with 'range checked for each record').
See comments around SEL_IMERGE class and test_quick_select for more details.
See comments around SEL_IMERGE class and test_quick_select for more
details.
ROW RETRIEVAL ALGORITHM
ROW RETRIEVAL ALGORITHM
index_merge uses Unique class for duplicates removal. Index merge takes
advantage of clustered covering primary key (C
CPK) if the table has one.
The algorithm is as follow
s:
index_merge uses Unique class for duplicates removal. index_merge takes
advantage of Clustered Primary Key (
CPK) if the table has one.
The index_merge algorithm consists of two phase
s:
prepare() //implemented in QUICK_INDEX_MERGE_SELECT::prepare_unique
Phase 1 (implemented in QUICK_INDEX_MERGE_SELECT::prepare_unique):
prepare()
{
activate 'index only';
while(retrieve next row for non-C
CPK scan)
while(retrieve next row for non-
CPK scan)
{
if (there is a C
CPK scan and row will be retrieved by it)
if (there is a
CPK scan and row will be retrieved by it)
skip this row;
else
put
rowid into Unique;
put its
rowid into Unique;
}
deactivate 'index only';
}
fetch() //implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next calls
Phase 2 (implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next
calls):
fetch()
{
retrieve all rows from row pointers stored in Unique;
free Unique;
retrieve all rows for C
CPK scan;
retrieve all rows for
CPK scan;
}
*/
class
QUICK_INDEX_MERGE_SELECT
:
public
QUICK_SELECT_I
...
...
@@ -239,10 +245,10 @@ class QUICK_INDEX_MERGE_SELECT : public QUICK_SELECT_I
/* last element in quick_selects list */
QUICK_RANGE_SELECT
*
last_quick_select
;
/* quick select that uses
Covering Clustered Primary K
ey (NULL if none) */
/* quick select that uses
clustered primary k
ey (NULL if none) */
QUICK_RANGE_SELECT
*
pk_quick_select
;
/* true if this select is currently doing a
CC
PK scan */
/* true if this select is currently doing a
clustered
PK scan */
bool
doing_pk_scan
;
Unique
*
unique
;
...
...
sql/records.cc
View file @
7dbdedcb
...
...
@@ -98,7 +98,6 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
}
}
else
if
(
select
&&
select
->
quick
)
//&& (select->quick->get_type() != QUICK_SELECT_I::QS_TYPE_INDEX_MERGE))
{
DBUG_PRINT
(
"info"
,(
"using rr_quick"
));
info
->
read_record
=
rr_quick
;
...
...
sql/sql_class.h
View file @
7dbdedcb
...
...
@@ -1233,7 +1233,8 @@ class Unique :public Sql_alloc
}
bool
get
(
TABLE
*
table
);
static
double
get_use_cost
(
MEM_ROOT
*
alloc
,
uint
nkeys
,
uint
key_size
,
ulong
max_in_memory_size
);
friend
int
unique_write_to_file
(
gptr
key
,
element_count
count
,
Unique
*
unique
);
friend
int
unique_write_to_ptrs
(
gptr
key
,
element_count
count
,
Unique
*
unique
);
};
...
...
sql/uniques.cc
View file @
7dbdedcb
...
...
@@ -63,12 +63,194 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
comp_func_fixed_arg
);
/* If the following fail's the next add will also fail */
my_init_dynamic_array
(
&
file_ptrs
,
sizeof
(
BUFFPEK
),
16
,
16
);
/*
If you change the following, change it in get_max_elements function, too.
*/
max_elements
=
max_in_memory_size
/
ALIGN_SIZE
(
sizeof
(
TREE_ELEMENT
)
+
size
);
open_cached_file
(
&
file
,
mysql_tmpdir
,
TEMP_PREFIX
,
DISK_BUFFER_SIZE
,
MYF
(
MY_WME
));
}
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
#define M_E (exp(1))
inline
double
log2_n_fact
(
double
x
)
{
return
(
2
*
(
((
x
)
+
1
)
*
log
(((
x
)
+
1
)
/
M_E
)
+
log
(
2
*
M_PI
*
((
x
)
+
1
))
/
2
)
/
log
(
2
));
}
/*
Calculate cost of merge_buffers call.
NOTE
See comment near Unique::get_use_cost for cost formula derivation.
*/
static
double
get_merge_buffers_cost
(
uint
*
buff_sizes
,
uint
elem_size
,
int
last
,
int
f
,
int
t
)
{
uint
sum
=
0
;
for
(
int
i
=
f
;
i
<=
t
;
i
++
)
sum
+=
buff_sizes
[
i
];
buff_sizes
[
last
]
=
sum
;
int
n_buffers
=
t
-
f
+
1
;
double
buf_length
=
sum
*
elem_size
;
return
(((
double
)
buf_length
/
(
n_buffers
+
1
))
/
IO_SIZE
)
*
2
*
n_buffers
+
buf_length
*
log
(
n_buffers
)
/
(
TIME_FOR_COMPARE_ROWID
*
log
(
2.0
));
}
/*
Calculate cost of merging buffers into one in Unique::get, i.e. calculate
how long (in terms of disk seeks) the two call
merge_many_buffs(...);
merge_buffers(...);
will take.
SYNOPSIS
get_merge_many_buffs_cost()
alloc memory pool to use
maxbuffer # of full buffers.
max_n_elems # of elements in first maxbuffer buffers.
last_n_elems # of elements in last buffer.
elem_size size of buffer element.
NOTES
It is assumed that maxbuffer+1 buffers are merged, first maxbuffer buffers
contain max_n_elems each, last buffer contains last_n_elems elements.
The current implementation does a dumb simulation of merge_many_buffs
actions.
RETURN
>=0 Cost of merge in disk seeks.
<0 Out of memory.
*/
static
double
get_merge_many_buffs_cost
(
MEM_ROOT
*
alloc
,
uint
maxbuffer
,
uint
max_n_elems
,
uint
last_n_elems
,
int
elem_size
)
{
register
int
i
;
double
total_cost
=
0.0
;
int
lastbuff
;
uint
*
buff_sizes
;
if
(
!
(
buff_sizes
=
(
uint
*
)
alloc_root
(
alloc
,
sizeof
(
uint
)
*
(
maxbuffer
+
1
))))
return
-
1.0
;
for
(
i
=
0
;
i
<
(
int
)
maxbuffer
;
i
++
)
buff_sizes
[
i
]
=
max_n_elems
;
buff_sizes
[
maxbuffer
]
=
last_n_elems
;
if
(
maxbuffer
>=
MERGEBUFF2
)
{
/* Simulate merge_many_buff */
while
(
maxbuffer
>=
MERGEBUFF2
)
{
lastbuff
=
0
;
for
(
i
=
0
;
i
<=
(
int
)
maxbuffer
-
MERGEBUFF
*
3
/
2
;
i
+=
MERGEBUFF
)
total_cost
+=
get_merge_buffers_cost
(
buff_sizes
,
elem_size
,
lastbuff
++
,
i
,
i
+
MERGEBUFF
-
1
);
total_cost
+=
get_merge_buffers_cost
(
buff_sizes
,
elem_size
,
lastbuff
++
,
i
,
maxbuffer
);
maxbuffer
=
(
uint
)
lastbuff
-
1
;
}
}
/* Simulate final merge_buff call. */
total_cost
+=
get_merge_buffers_cost
(
buff_sizes
,
elem_size
,
0
,
0
,
maxbuffer
);
return
total_cost
;
}
/*
Calclulate cost of using Unique for processing nkeys elements of size
key_size using max_in_memory_size memory.
RETURN
Use cost as # of disk seeks.
NOTES
cost(using_unqiue) =
cost(create_trees) + (see #1)
cost(merge) + (see #2)
cost(read_result) (see #3)
1. Cost of trees creation
For each Unique::put operation there will be 2*log2(n+1) elements
comparisons, where n runs from 1 tree_size (we assume that all added
elements are different). Together this gives:
n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!) =
= 2*ln((N+1)!) / ln(2) = {using Stirling formula} =
= 2*( (N+1)*ln((N+1)/e) + (1/2)*ln(2*pi*(N+1)) / ln(2).
then cost(tree_creation) = n_compares*ROWID_COMPARE_COST;
Total cost of creating trees:
(n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost.
2. Cost of merging.
If only one tree is created by Unique no merging will be necessary.
Otherwise, we model execution of merge_many_buff function and count
#of merges. (The reason behind this is that number of buffers is small,
while size of buffers is big and we don't want to loose precision with
O(x)-style formula)
3. If only one tree is created by Unique no disk io will happen.
Otherwise, ceil(key_len*n_keys) disk seeks are necessary. We assume
these will be random seeks.
*/
double
Unique
::
get_use_cost
(
MEM_ROOT
*
alloc
,
uint
nkeys
,
uint
key_size
,
ulong
max_in_memory_size
)
{
ulong
max_elements_in_tree
;
ulong
last_tree_elems
;
int
n_full_trees
;
/* number of trees in unique - 1 */
double
result
;
max_elements_in_tree
=
max_in_memory_size
/
ALIGN_SIZE
(
sizeof
(
TREE_ELEMENT
)
+
key_size
);
n_full_trees
=
nkeys
/
max_elements_in_tree
;
last_tree_elems
=
nkeys
%
max_elements_in_tree
;
/* Calculate cost of creating trees */
result
=
log2_n_fact
(
last_tree_elems
);
if
(
n_full_trees
)
result
+=
n_full_trees
*
log2_n_fact
(
max_elements_in_tree
);
result
/=
TIME_FOR_COMPARE_ROWID
;
/* Calculate cost of merging */
if
(
!
n_full_trees
)
return
result
;
/* There is more then one tree and merging is necessary. */
/* Add cost of writing all trees to disk. */
result
+=
n_full_trees
*
ceil
(
key_size
*
max_elements_in_tree
/
IO_SIZE
);
result
+=
ceil
(
key_size
*
last_tree_elems
/
IO_SIZE
);
/* Cost of merge */
result
+=
get_merge_many_buffs_cost
(
alloc
,
n_full_trees
,
max_elements_in_tree
,
last_tree_elems
,
key_size
);
/*
Add cost of reading the resulting sequence, assuming there were no
duplicate elements.
*/
result
+=
ceil
((
double
)
key_size
*
nkeys
/
IO_SIZE
);
return
result
;
}
Unique
::~
Unique
()
{
close_cached_file
(
&
file
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment