Commit a4a1bb0e authored by unknown's avatar unknown

WL#2985 "Partition Pruning": post-review fixes:

- Added more comments.
- Added a RANGE_OPT_PARAM::remove_jump_scans flag that disables construction of index_merge
  SEL_TREEs that represent unusable conditions like "key1part1<c1 OR key2part2<c2"
- make prune_partitions() function handle the case where range analysis produces a list of 
  index_merge trees (it turned out that this is possible, appropriate test case added).
- Other small fixes.


mysql-test/r/partition_pruning.result:
  WL#2985 "Partition Pruning": post-review fixes: more test cases
mysql-test/t/partition_pruning.test:
  WL#2985 "Partition Pruning": post-review fixes: more test cases
sql/opt_range.cc:
  WL#2985 "Partition Pruning": post-review fixes:
  - Added more comments.
  - Fix the debug printouts
  - Added a RANGE_OPT_PARAM::remove_jump_scans flag that disables construction of index_merge
    SEL_TREEs that represent unusable conditions like "key1part1<c1 OR key2part2<c2"
  - make prune_partitions() function handle the case where range analysis produces a list of 
    index_merge trees (it turned out that this is possible, appropriate test case added).
sql/sql_partition.cc:
  WL#2985 "Partition Pruning": post-review fixes: make requested edits in comments.
sql/table.h:
  WL#2985 "Partition Pruning": post-review fixes: added bool TABLE::no_partitions_used
  (this change was missed when making the original cset)
parent f19fb870
...@@ -213,3 +213,48 @@ explain partitions select * from t1 where a='b'; ...@@ -213,3 +213,48 @@ explain partitions select * from t1 where a='b';
id select_type table partitions type possible_keys key key_len ref rows Extra id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 p0,p1 ALL NULL NULL NULL NULL 3 Using where 1 SIMPLE t1 p0,p1 ALL NULL NULL NULL NULL 3 Using where
drop table t1; drop table t1;
create table t1 (
a1 int not null
)
partition by range (a1) (
partition p0 values less than (3),
partition p1 values less than (6),
partition p2 values less than (9)
);
insert into t1 values (1),(2),(3);
explain partitions select * from t1 where a1 > 3;
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 p1,p2 ALL NULL NULL NULL NULL 3 Using where
explain partitions select * from t1 where a1 >= 3;
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 p1,p2 ALL NULL NULL NULL NULL 3 Using where
explain partitions select * from t1 where a1 < 3 and a1 > 3;
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
drop table t1;
create table t3 (a int, b int)
partition by list(a) subpartition by hash(b) subpartitions 4 (
partition p0 values in (1),
partition p1 values in (2),
partition p2 values in (3),
partition p3 values in (4)
);
insert into t3 values (1,1),(2,2),(3,3);
explain partitions select * from t3 where a=2 or b=1;
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t3 p0_sp1,p1_sp0,p1_sp1,p1_sp2,p1_sp3,p2_sp1,p3_sp1 ALL NULL NULL NULL NULL 3 Using where
explain partitions select * from t3 where a=4 or b=2;
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t3 p0_sp2,p1_sp2,p2_sp2,p3_sp0,p3_sp1,p3_sp2,p3_sp3 ALL NULL NULL NULL NULL 3 Using where
explain partitions select * from t3 where (a=2 or b=1) and (a=4 or b=2) ;
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t3 p1_sp2,p3_sp1 ALL NULL NULL NULL NULL 3 Using where
create table t1 (a int) partition by hash(a) partitions 2;
insert into t1 values (1),(2);
explain partitions select * from t1 where a is null;
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 p0 ALL NULL NULL NULL NULL 2 Using where
explain partitions select * from t1 where a is not null;
id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 p0,p1 ALL NULL NULL NULL NULL 2 Using where
drop table t1;
...@@ -192,4 +192,46 @@ insert into t1 values ('a'),('b'),('c'); ...@@ -192,4 +192,46 @@ insert into t1 values ('a'),('b'),('c');
explain partitions select * from t1 where a='b'; explain partitions select * from t1 where a='b';
drop table t1; drop table t1;
#
# Test cases for bugs found in code review:
#
create table t1 (
a1 int not null
)
partition by range (a1) (
partition p0 values less than (3),
partition p1 values less than (6),
partition p2 values less than (9)
);
insert into t1 values (1),(2),(3);
explain partitions select * from t1 where a1 > 3;
explain partitions select * from t1 where a1 >= 3;
explain partitions select * from t1 where a1 < 3 and a1 > 3;
drop table t1;
#
create table t3 (a int, b int)
partition by list(a) subpartition by hash(b) subpartitions 4 (
partition p0 values in (1),
partition p1 values in (2),
partition p2 values in (3),
partition p3 values in (4)
);
insert into t3 values (1,1),(2,2),(3,3);
explain partitions select * from t3 where a=2 or b=1;
explain partitions select * from t3 where a=4 or b=2;
explain partitions select * from t3 where (a=2 or b=1) and (a=4 or b=2) ;
# Test for NULLs
create table t1 (a int) partition by hash(a) partitions 2;
insert into t1 values (1),(2);
explain partitions select * from t1 where a is null;
# this selects both
explain partitions select * from t1 where a is not null;
drop table t1;
# No tests for NULLs in RANGE(monotonic_expr()) - they depend on BUG#15447
# being fixed.
...@@ -24,16 +24,42 @@ ...@@ -24,16 +24,42 @@
*/ */
/* /*
Classes in this file are used in the following way: This file contains:
1. For a selection condition a tree of SEL_IMERGE/SEL_TREE/SEL_ARG objects
is created. #of rows in table and index statistics are ignored at this RangeAnalysisModule
step. A module that accepts a condition, index (or partitioning) description,
2. Created SEL_TREE and index stats data are used to construct a and builds lists of intervals (in index/partitioning space), such that
TABLE_READ_PLAN-derived object (TRP_*). Several 'candidate' table read all possible records that match the condition are contained within the
plans may be created. intervals.
3. The least expensive table read plan is used to create a tree of The entry point for the range analysis module is get_mm_tree() function.
QUICK_SELECT_I-derived objects which are later used for row retrieval.
QUICK_RANGEs are also created in this step. The lists are returned in form of complicated structure of interlinked
SEL_TREE/SEL_IMERGE/SEL_ARG objects.
See check_quick_keys, find_used_partitions for examples of how to walk
this structure.
All direct "users" of this module are located within this file, too.
PartitionPruningModule
A module that accepts a partitioned table, condition, and finds which
partitions we will need to use in query execution. Search down for
"PartitionPruningModule" for description.
The module has single entry point - prune_partitions() function.
Range/index_merge/groupby-minmax optimizer module
A module that accepts a table, condition, and returns
- a QUICK_*_SELECT object that can be used to retrieve rows that match
the specified condition, or a "no records will match the condition"
statement.
The module entry points are
test_quick_select()
get_quick_select_for_ref()
Record retrieval code for range/index_merge/groupby-min-max.
Implementations of QUICK_*_SELECT classes.
*/ */
#ifdef USE_PRAGMA_IMPLEMENTATION #ifdef USE_PRAGMA_IMPLEMENTATION
...@@ -301,6 +327,11 @@ class SEL_IMERGE; ...@@ -301,6 +327,11 @@ class SEL_IMERGE;
class SEL_TREE :public Sql_alloc class SEL_TREE :public Sql_alloc
{ {
public: public:
/*
Starting an effort to document this field:
(for some i, keys[i]->type == SEL_ARG::IMPOSSIBLE) =>
(type == SEL_TREE::IMPOSSIBLE)
*/
enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type; enum Type { IMPOSSIBLE, ALWAYS, MAYBE, KEY, KEY_SMALLER } type;
SEL_TREE(enum Type type_arg) :type(type_arg) {} SEL_TREE(enum Type type_arg) :type(type_arg) {}
SEL_TREE() :type(KEY) SEL_TREE() :type(KEY)
...@@ -354,6 +385,8 @@ public: ...@@ -354,6 +385,8 @@ public:
*/ */
bool using_real_indexes; bool using_real_indexes;
bool remove_jump_scans;
/* /*
used_key_no -> table_key_no translation table. Only makes sense if used_key_no -> table_key_no translation table. Only makes sense if
using_real_indexes==TRUE using_real_indexes==TRUE
...@@ -1810,6 +1843,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, ...@@ -1810,6 +1843,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
param.needed_reg= &needed_reg; param.needed_reg= &needed_reg;
param.imerge_cost_buff_size= 0; param.imerge_cost_buff_size= 0;
param.using_real_indexes= TRUE; param.using_real_indexes= TRUE;
param.remove_jump_scans= TRUE;
thd->no_errors=1; // Don't warn about NULL thd->no_errors=1; // Don't warn about NULL
init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0); init_sql_alloc(&alloc, thd->variables.range_alloc_block_size, 0);
...@@ -2005,6 +2039,87 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, ...@@ -2005,6 +2039,87 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
****************************************************************************/ ****************************************************************************/
#ifdef WITH_PARTITION_STORAGE_ENGINE #ifdef WITH_PARTITION_STORAGE_ENGINE
/*
PartitionPruningModule
This part of the code does partition pruning. Partition pruning solves the
following problem: given a query over partitioned tables, find partitions
that we will not need to access (i.e. partitions that we can assume to be
empty) when executing the query.
The set of partitions to prune doesn't depend on which query execution
plan will be used to execute the query.
HOW IT WORKS
Partition pruning module makes use of RangeAnalysisModule. The following
examples show how the problem of partition pruning can be reduced to the
range analysis problem:
EXAMPLE 1
Consider a query:
SELECT * FROM t1 WHERE (t1.a < 5 OR t1.a = 10) AND t1.a > 3 AND t1.b='z'
where table t1 is partitioned using PARTITION BY RANGE(t1.a). An apparent
way to find the used (i.e. not pruned away) partitions is as follows:
1. analyze the WHERE clause and extract the list of intervals over t1.a
for the above query we will get this list: {(3 < t1.a < 5), (t1.a=10)}
2. for each interval I
{
find partitions that have non-empty intersection with I;
mark them as used;
}
EXAMPLE 2
Suppose the table is partitioned by HASH(part_func(t1.a, t1.b)). Then
we need to:
1. Analyze the WHERE clause and get a list of intervals over (t1.a, t1.b).
The list of intervals we'll obtain will look like this:
((t1.a, t1.b) = (1,'foo')),
((t1.a, t1.b) = (2,'bar')),
((t1,a, t1.b) > (10,'zz')) (**)
2. for each interval I
{
if (the interval has form "(t1.a, t1.b) = (const1, const2)" )
{
calculate HASH(part_func(t1.a, t1.b));
find which partition has records with this hash value and mark
it as used;
}
else
{
mark all partitions as used;
break;
}
}
For both examples the step #1 is exactly what RangeAnalysisModule could
be used to do, if it was provided with appropriate index description
(array of KEY_PART structures).
In example #1, we need to provide it with description of index(t1.a),
in example #2, we need to provide it with description of index(t1.a, t1.b).
These index descriptions are further called "partitioning index
descriptions". Note that it doesn't matter if such indexes really exist,
as range analysis module only uses the description.
Putting it all together, partitioning module works as follows:
prune_partitions() {
call create_partition_index_descrition();
call get_mm_tree(); // invoke the RangeAnalysisModule
// analyze the obtained interval list and get used partitions
call find_used_partitions();
}
*/
struct st_part_prune_param; struct st_part_prune_param;
struct st_part_opt_info; struct st_part_opt_info;
...@@ -2019,7 +2134,7 @@ typedef uint32 (*get_endpoint_func)(partition_info*, bool left_endpoint, ...@@ -2019,7 +2134,7 @@ typedef uint32 (*get_endpoint_func)(partition_info*, bool left_endpoint,
*/ */
typedef struct st_part_prune_param typedef struct st_part_prune_param
{ {
RANGE_OPT_PARAM range_param; /* Range optimizer parameters */ RANGE_OPT_PARAM range_param; /* Range analyzer parameters */
/*************************************************************** /***************************************************************
Following fields are filled in based solely on partitioning Following fields are filled in based solely on partitioning
...@@ -2096,18 +2211,30 @@ typedef struct st_part_prune_param ...@@ -2096,18 +2211,30 @@ typedef struct st_part_prune_param
/*************************************************************** /***************************************************************
Following fields are used to store an 'iterator' that can be Following fields are used to store an 'iterator' that can be
used to obtain a set of used of used partitions. used to obtain a set of used artitions.
**************************************************************/ **************************************************************/
/* /*
Start number, end number and Start and end+1 partition "numbers". They can have two meanings depending
depending of the value of part_num_to_part_id:
part_num_to_part_id_range - numbers are partition ids
part_num_to_part_id_list - numbers are indexes in part_info->list_array
*/ */
part_num_to_partition_id_func part_num_to_part_id;
uint32 start_part_num; uint32 start_part_num;
uint32 end_part_num; uint32 end_part_num;
/*
A function that should be used to convert two above "partition numbers"
to partition_ids.
*/
part_num_to_partition_id_func part_num_to_part_id;
} PART_PRUNE_PARAM; } PART_PRUNE_PARAM;
static bool create_partition_index_descrition(PART_PRUNE_PARAM *prune_par); static bool create_partition_index_descrition(PART_PRUNE_PARAM *prune_par);
static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree); static int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree);
static int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar,
SEL_IMERGE *imerge);
static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
List<SEL_IMERGE> &merges);
static void mark_all_partitions_as_used(partition_info *part_info); static void mark_all_partitions_as_used(partition_info *part_info);
static uint32 part_num_to_part_id_range(PART_PRUNE_PARAM* prune_par, static uint32 part_num_to_part_id_range(PART_PRUNE_PARAM* prune_par,
uint32 num); uint32 num);
...@@ -2185,6 +2312,7 @@ bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond) ...@@ -2185,6 +2312,7 @@ bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond)
range_par->keys= 1; // one index range_par->keys= 1; // one index
range_par->using_real_indexes= FALSE; range_par->using_real_indexes= FALSE;
range_par->remove_jump_scans= FALSE;
range_par->real_keynr[0]= 0; range_par->real_keynr[0]= 0;
thd->no_errors=1; // Don't warn about NULL thd->no_errors=1; // Don't warn about NULL
...@@ -2196,8 +2324,7 @@ bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond) ...@@ -2196,8 +2324,7 @@ bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond)
int res; int res;
tree= get_mm_tree(range_par, pprune_cond); tree= get_mm_tree(range_par, pprune_cond);
if (!tree || (tree->type != SEL_TREE::KEY && if (!tree)
tree->type != SEL_TREE::KEY_SMALLER))
goto all_used; goto all_used;
if (tree->type == SEL_TREE::IMPOSSIBLE) if (tree->type == SEL_TREE::IMPOSSIBLE)
...@@ -2206,6 +2333,9 @@ bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond) ...@@ -2206,6 +2333,9 @@ bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond)
goto end; goto end;
} }
if (tree->type != SEL_TREE::KEY && tree->type != SEL_TREE::KEY_SMALLER)
goto all_used;
if (tree->merges.is_empty()) if (tree->merges.is_empty())
{ {
prune_param.arg_stack_end= prune_param.arg_stack; prune_param.arg_stack_end= prune_param.arg_stack;
...@@ -2220,27 +2350,30 @@ bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond) ...@@ -2220,27 +2350,30 @@ bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond)
} }
else else
{ {
res= 0; if (tree->merges.elements == 1)
DBUG_ASSERT(tree->merges.elements == 1);
SEL_IMERGE *imerge= tree->merges.head();
for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++)
{ {
prune_param.arg_stack_end= prune_param.arg_stack; if (-1 == (res |= find_used_partitions_imerge(&prune_param,
prune_param.cur_part_fields= 0; tree->merges.head())))
prune_param.cur_subpart_fields= 0; goto all_used;
prune_param.part_num_to_part_id= part_num_to_part_id_range; }
prune_param.start_part_num= 0; else
prune_param.end_part_num= prune_param.part_info->no_parts; {
if (-1 == (res |= find_used_partitions(&prune_param, (*ptree)->keys[0]))) if (-1 == (res |= find_used_partitions_imerge_list(&prune_param,
tree->merges)))
goto all_used; goto all_used;
} }
} }
if (!res) /*
retval= TRUE; res == 0 => no used partitions => retval=TRUE
res == 1 => some used partitions => retval=FALSE
res == -1 - we jump over this line to all_used:
*/
retval= test(!res);
goto end; goto end;
all_used: all_used:
retval= FALSE; // some partitions are used
mark_all_partitions_as_used(prune_param.part_info); mark_all_partitions_as_used(prune_param.part_info);
end: end:
thd->no_errors=0; thd->no_errors=0;
...@@ -2316,16 +2449,113 @@ static void mark_full_partition_used_with_parts(partition_info *part_info, ...@@ -2316,16 +2449,113 @@ static void mark_full_partition_used_with_parts(partition_info *part_info,
bitmap_set_bit(&part_info->used_partitions, start); bitmap_set_bit(&part_info->used_partitions, start);
} }
static /* See comment in PART_PRUNE_PARAM::part_num_to_part_id about what this is */
uint32 part_num_to_part_id_range(PART_PRUNE_PARAM* prune_par, uint32 num) static uint32 part_num_to_part_id_range(PART_PRUNE_PARAM* ppar, uint32 num)
{ {
return num; return num;
} }
/* See comment in PART_PRUNE_PARAM::part_num_to_part_id about what this is */
static uint32 part_num_to_part_id_list(PART_PRUNE_PARAM* ppar, uint32 num)
{
return ppar->part_info->list_array[num].partition_id;
}
/*
Find the set of used partitions for List<SEL_IMERGE>
SYNOPSIS
find_used_partitions_imerge_list
ppar Partition pruning context.
key_tree Intervals tree to perform pruning for.
DESCRIPTION
List<SEL_IMERGE> represents "imerge1 AND imerge2 AND ...".
The set of used partitions is an intersection of used partitions sets
for imerge_{i}.
We accumulate this intersection a separate bitmap.
RETURN
See find_used_partitions()
*/
static int find_used_partitions_imerge_list(PART_PRUNE_PARAM *ppar,
List<SEL_IMERGE> &merges)
{
MY_BITMAP all_merges;
uint bitmap_bytes;
uint32 *bitmap_buf;
uint n_bits= ppar->part_info->used_partitions.n_bits;
bitmap_bytes= bitmap_buffer_size(n_bits);
if (!(bitmap_buf= (uint32*)alloc_root(ppar->range_param.mem_root,
bitmap_bytes)))
{
/*
Fallback, process just first SEL_IMERGE. This can leave us with more
partitions marked as used then actually needed.
*/
return find_used_partitions_imerge(ppar, merges.head());
}
bitmap_init(&all_merges, bitmap_buf, n_bits, FALSE);
bitmap_set_prefix(&all_merges, n_bits);
List_iterator<SEL_IMERGE> it(merges);
SEL_IMERGE *imerge;
while ((imerge=it++))
{
int res= find_used_partitions_imerge(ppar, imerge);
if (!res)
{
/* no used partitions on one ANDed imerge => no used partitions at all */
return 0;
}
if (res != -1)
bitmap_intersect(&all_merges, &ppar->part_info->used_partitions);
if (bitmap_is_clear_all(&all_merges))
return 0;
bitmap_clear_all(&ppar->part_info->used_partitions);
}
memcpy(ppar->part_info->used_partitions.bitmap, all_merges.bitmap,
bitmap_bytes);
return 1;
}
/*
Find the set of used partitions for SEL_IMERGE structure
SYNOPSIS
find_used_partitions_imerge()
ppar Partition pruning context.
key_tree Intervals tree to perform pruning for.
DESCRIPTION
SEL_IMERGE represents "tree1 OR tree2 OR ...". The implementation is
trivial - just use mark used partitions for each tree and bail out early
if for some tree_{i} all partitions are used.
RETURN
See find_used_partitions().
*/
static static
uint32 part_num_to_part_id_list(PART_PRUNE_PARAM* prune_par, uint32 num) int find_used_partitions_imerge(PART_PRUNE_PARAM *ppar, SEL_IMERGE *imerge)
{ {
return prune_par->part_info->list_array[num].partition_id; int res= 0;
for (SEL_TREE **ptree= imerge->trees; ptree < imerge->trees_next; ptree++)
{
ppar->arg_stack_end= ppar->arg_stack;
ppar->cur_part_fields= 0;
ppar->cur_subpart_fields= 0;
ppar->part_num_to_part_id= part_num_to_part_id_range;
ppar->start_part_num= 0;
ppar->end_part_num= ppar->part_info->no_parts;
if (-1 == (res |= find_used_partitions(ppar, (*ptree)->keys[0])))
return -1;
}
return res;
} }
...@@ -2370,8 +2600,8 @@ uint32 part_num_to_part_id_list(PART_PRUNE_PARAM* prune_par, uint32 num) ...@@ -2370,8 +2600,8 @@ uint32 part_num_to_part_id_list(PART_PRUNE_PARAM* prune_par, uint32 num)
1 OK, one or more [sub]partitions are marked as used. 1 OK, one or more [sub]partitions are marked as used.
0 The passed condition doesn't match any partitions 0 The passed condition doesn't match any partitions
-1 Couldn't infer any partition pruning "intervals" from the passed -1 Couldn't infer any partition pruning "intervals" from the passed
SEL_ARG* tree. Marking partitions as used is the responsibility of SEL_ARG* tree (which means that all partitions should be marked as
the caller. used) Marking partitions as used is the responsibility of the caller.
*/ */
static static
...@@ -2401,11 +2631,15 @@ int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree) ...@@ -2401,11 +2631,15 @@ int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree)
key_parts);); key_parts););
/* Find minimum */ /* Find minimum */
if (key_tree->min_flag & NO_MIN_RANGE) if (key_tree->min_flag & NO_MIN_RANGE)
{
ppar->start_part_num= 0; ppar->start_part_num= 0;
}
else else
{ {
/*
Store the interval edge in the record buffer, and call the
function that maps the edge in table-field space to an edge
in ordered-set-of-partitions (for RANGE partitioning) or
indexes-in-ordered-array-of-list-constants (for LIST) space.
*/
store_key_image_to_rec(key_tree->field, key_tree->min_value, store_key_image_to_rec(key_tree->field, key_tree->min_value,
ppar->range_param.key_parts[0].length); ppar->range_param.key_parts[0].length);
bool include_endp= ppar->force_include_bounds || bool include_endp= ppar->force_include_bounds ||
...@@ -2419,11 +2653,9 @@ int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree) ...@@ -2419,11 +2653,9 @@ int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree)
} }
} }
/* Find maximum */ /* Find maximum, do the same as above but for right interval bound */
if (key_tree->min_flag & NO_MAX_RANGE) if (key_tree->max_flag & NO_MAX_RANGE)
{
ppar->end_part_num= ppar->max_endpoint_val; ppar->end_part_num= ppar->max_endpoint_val;
}
else else
{ {
store_key_image_to_rec(key_tree->field, key_tree->max_value, store_key_image_to_rec(key_tree->field, key_tree->max_value,
...@@ -2507,7 +2739,6 @@ int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree) ...@@ -2507,7 +2739,6 @@ int find_used_partitions(PART_PRUNE_PARAM *ppar, SEL_ARG *key_tree)
bitmap_set_bit(&part_info->used_partitions, bitmap_set_bit(&part_info->used_partitions,
ppar->part_num_to_part_id(ppar, num) * ppar->part_num_to_part_id(ppar, num) *
part_info->no_subparts + subpart_id); part_info->no_subparts + subpart_id);
ppar->start_part_num++;
} }
res= 1; /* Some partitions were marked as used */ res= 1; /* Some partitions were marked as used */
goto pop_and_go_right; goto pop_and_go_right;
...@@ -2726,8 +2957,7 @@ static bool create_partition_index_descrition(PART_PRUNE_PARAM *ppar) ...@@ -2726,8 +2957,7 @@ static bool create_partition_index_descrition(PART_PRUNE_PARAM *ppar)
key_part->length= (*field)->pack_length_in_rec(); key_part->length= (*field)->pack_length_in_rec();
/* /*
psergey-todo: check yet again if this is correct for tricky field types, psergey-todo: check yet again if this is correct for tricky field types,
e.g. see "Fix a fatal error in decimal key handling" in e.g. see "Fix a fatal error in decimal key handling" in open_binary_frm()
open_binary_frm().
*/ */
key_part->store_length= (*field)->pack_length(); key_part->store_length= (*field)->pack_length();
if ((*field)->real_maybe_null()) if ((*field)->real_maybe_null())
...@@ -2768,7 +2998,7 @@ static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end) ...@@ -2768,7 +2998,7 @@ static void print_partitioning_index(KEY_PART *parts, KEY_PART *parts_end)
{ {
fprintf(DBUG_FILE, "%s%s", p==parts?"":" ,", p->field->field_name); fprintf(DBUG_FILE, "%s%s", p==parts?"":" ,", p->field->field_name);
} }
fprintf(DBUG_FILE, ");\n"); fputs(");\n", DBUG_FILE);
DBUG_UNLOCK_FILE; DBUG_UNLOCK_FILE;
DBUG_VOID_RETURN; DBUG_VOID_RETURN;
} }
...@@ -2780,7 +3010,9 @@ static void dbug_print_field(Field *field) ...@@ -2780,7 +3010,9 @@ static void dbug_print_field(Field *field)
fprintf(DBUG_FILE, "NULL"); fprintf(DBUG_FILE, "NULL");
else else
{ {
String str; char buf[256];
String str(buf, sizeof(buf), &my_charset_bin);
str.length(0);
String *pstr; String *pstr;
pstr= field->val_str(&str); pstr= field->val_str(&str);
fprintf(DBUG_FILE, "'%s'", pstr->c_ptr_safe()); fprintf(DBUG_FILE, "'%s'", pstr->c_ptr_safe());
...@@ -2795,7 +3027,7 @@ static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part) ...@@ -2795,7 +3027,7 @@ static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part)
DBUG_LOCK_FILE; DBUG_LOCK_FILE;
if (!(arg->min_flag & NO_MIN_RANGE)) if (!(arg->min_flag & NO_MIN_RANGE))
{ {
arg->field->set_key_image((char*)(arg->min_value), part->length); store_key_image_to_rec(part->field, (char*)(arg->min_value), part->length);
dbug_print_field(part->field); dbug_print_field(part->field);
if (arg->min_flag & NEAR_MIN) if (arg->min_flag & NEAR_MIN)
fputs(" < ", DBUG_FILE); fputs(" < ", DBUG_FILE);
...@@ -2811,9 +3043,10 @@ static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part) ...@@ -2811,9 +3043,10 @@ static void dbug_print_segment_range(SEL_ARG *arg, KEY_PART *part)
fputs(" < ", DBUG_FILE); fputs(" < ", DBUG_FILE);
else else
fputs(" <= ", DBUG_FILE); fputs(" <= ", DBUG_FILE);
arg->field->set_key_image((char*)(arg->max_value), part->length); store_key_image_to_rec(part->field, (char*)(arg->min_value), part->length);
dbug_print_field(part->field); dbug_print_field(part->field);
} }
fputs("\n", DBUG_FILE);
DBUG_UNLOCK_FILE; DBUG_UNLOCK_FILE;
DBUG_VOID_RETURN; DBUG_VOID_RETURN;
} }
...@@ -2837,12 +3070,14 @@ static void dbug_print_onepoint_range(SEL_ARG **start, uint num) ...@@ -2837,12 +3070,14 @@ static void dbug_print_onepoint_range(SEL_ARG **start, uint num)
DBUG_ENTER("dbug_print_onepoint_range"); DBUG_ENTER("dbug_print_onepoint_range");
DBUG_LOCK_FILE; DBUG_LOCK_FILE;
SEL_ARG **end= start + num; SEL_ARG **end= start + num;
for (SEL_ARG **arg= start; arg != end; arg++) for (SEL_ARG **arg= start; arg != end; arg++)
{ {
Field *field= (*arg)->field; Field *field= (*arg)->field;
fprintf(DBUG_FILE, "%s%s=", (arg==start)?"":", ", field->field_name); fprintf(DBUG_FILE, "%s%s=", (arg==start)?"":", ", field->field_name);
dbug_print_field(field); dbug_print_field(field);
} }
fputs("\n", DBUG_FILE);
DBUG_UNLOCK_FILE; DBUG_UNLOCK_FILE;
DBUG_VOID_RETURN; DBUG_VOID_RETURN;
} }
...@@ -5062,7 +5297,8 @@ tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) ...@@ -5062,7 +5297,8 @@ tree_and(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
using index_merge. using index_merge.
*/ */
bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param) bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2,
RANGE_OPT_PARAM* param)
{ {
key_map common_keys= tree1->keys_map; key_map common_keys= tree1->keys_map;
DBUG_ENTER("sel_trees_can_be_ored"); DBUG_ENTER("sel_trees_can_be_ored");
...@@ -5088,6 +5324,79 @@ bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* pa ...@@ -5088,6 +5324,79 @@ bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* pa
DBUG_RETURN(FALSE); DBUG_RETURN(FALSE);
} }
/*
Remove the trees that are not suitable for record retrieval.
SYNOPSIS
param Range analysis parameter
tree Tree to be processed, tree->type is KEY or KEY_SMALLER
DESCRIPTION
This function walks through tree->keys[] and removes the SEL_ARG* trees
that are not "maybe" trees (*) and cannot be used to construct quick range
selects.
(*) - have type MAYBE or MAYBE_KEY. Perhaps we should remove trees of
these types here as well.
A SEL_ARG* tree cannot be used to construct quick select if it has
tree->part != 0. (e.g. it could represent "keypart2 < const").
WHY THIS FUNCTION IS NEEDED
Normally we allow construction of SEL_TREE objects that have SEL_ARG
trees that do not allow quick range select construction. For example for
" keypart1=1 AND keypart2=2 " the execution will proceed as follows:
tree1= SEL_TREE { SEL_ARG{keypart1=1} }
tree2= SEL_TREE { SEL_ARG{keypart2=2} } -- can't make quick range select
from this
call tree_and(tree1, tree2) -- this joins SEL_ARGs into a usable SEL_ARG
tree.
There is an exception though: when we construct index_merge SEL_TREE,
any SEL_ARG* tree that cannot be used to construct quick range select can
be removed, because current range analysis code doesn't provide any way
that tree could be later combined with another tree.
Consider an example: we should not construct
st1 = SEL_TREE {
merges = SEL_IMERGE {
SEL_TREE(t.key1part1 = 1),
SEL_TREE(t.key2part2 = 2) -- (*)
}
};
because
- (*) cannot be used to construct quick range select,
- There is no execution path that would cause (*) to be converted to
a tree that could be used.
The latter is easy to verify: first, notice that the only way to convert
(*) into a usable tree is to call tree_and(something, (*)).
Second look at what tree_and/tree_or function would do when passed a
SEL_TREE that has the structure like st1 tree has, and conlcude that
tree_and(something, (*)) will not be called.
RETURN
0 Ok, some suitable trees left
1 No tree->keys[] left.
*/
static bool remove_nonrange_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree)
{
bool res= FALSE;
for (uint i=0; i < param->keys; i++)
{
if (tree->keys[i])
{
if (tree->keys[i]->part)
tree->keys[i]= NULL;
else
res= TRUE;
}
}
return res;
}
static SEL_TREE * static SEL_TREE *
tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
{ {
...@@ -5131,6 +5440,13 @@ tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) ...@@ -5131,6 +5440,13 @@ tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
/* ok, two trees have KEY type but cannot be used without index merge */ /* ok, two trees have KEY type but cannot be used without index merge */
if (tree1->merges.is_empty() && tree2->merges.is_empty()) if (tree1->merges.is_empty() && tree2->merges.is_empty())
{ {
if (param->remove_jump_scans)
{
bool no_trees= remove_nonrange_trees(param, tree1);
no_trees= no_trees || remove_nonrange_trees(param, tree2);
if (no_trees)
DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS));
}
SEL_IMERGE *merge; SEL_IMERGE *merge;
/* both trees are "range" trees, produce new index merge structure */ /* both trees are "range" trees, produce new index merge structure */
if (!(result= new SEL_TREE()) || !(merge= new SEL_IMERGE()) || if (!(result= new SEL_TREE()) || !(merge= new SEL_IMERGE()) ||
...@@ -5154,6 +5470,8 @@ tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2) ...@@ -5154,6 +5470,8 @@ tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
if (tree1->merges.is_empty()) if (tree1->merges.is_empty())
swap_variables(SEL_TREE*, tree1, tree2); swap_variables(SEL_TREE*, tree1, tree2);
if (param->remove_jump_scans && remove_nonrange_trees(param, tree2))
DBUG_RETURN(new SEL_TREE(SEL_TREE::ALWAYS));
/* add tree2 to tree1->merges, checking if it collapses to ALWAYS */ /* add tree2 to tree1->merges, checking if it collapses to ALWAYS */
if (imerge_list_or_tree(param, &tree1->merges, tree2)) if (imerge_list_or_tree(param, &tree1->merges, tree2))
result= new SEL_TREE(SEL_TREE::ALWAYS); result= new SEL_TREE(SEL_TREE::ALWAYS);
......
...@@ -2493,8 +2493,9 @@ notfound: ...@@ -2493,8 +2493,9 @@ notfound:
DBUG_RETURN(TRUE); DBUG_RETURN(TRUE);
} }
/* /*
Find the part of part_info->list_array that corresponds to given interval Find the sub-array part_info->list_array that corresponds to given interval
SYNOPSIS SYNOPSIS
get_list_array_idx_for_endpoint() get_list_array_idx_for_endpoint()
...@@ -2504,17 +2505,16 @@ notfound: ...@@ -2504,17 +2505,16 @@ notfound:
include_endpoint TRUE iff the interval includes the endpoint include_endpoint TRUE iff the interval includes the endpoint
DESCRIPTION DESCRIPTION
This function finds the part of part_info->list_array where values of This function finds the sub-array of part_info->list_array where values of
list_array[idx].list_value are contained within the specifed interval. list_array[idx].list_value are contained within the specifed interval.
list_array is ordered by list_value, so list_array is ordered by list_value, so
1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the 1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the
sought array part starts at some index idx and continues till array sought sub-array starts at some index idx and continues till array end.
end.
The function returns first number idx, such that The function returns first number idx, such that
list_array[idx].list_value is contained within the passed interval. list_array[idx].list_value is contained within the passed interval.
2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the 2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the
sought array part starts at array start and continues till some last sought sub-array starts at array start and continues till some last
index idx. index idx.
The function returns first number idx, such that The function returns first number idx, such that
list_array[idx].list_value is NOT contained within the passed interval. list_array[idx].list_value is NOT contained within the passed interval.
...@@ -2522,14 +2522,14 @@ notfound: ...@@ -2522,14 +2522,14 @@ notfound:
returned. returned.
NOTE NOTE
The caller will call this function and then will run along the part of The caller will call this function and then will run along the sub-array of
list_array to collect partition ids. If the number of list values is list_array to collect partition ids. If the number of list values is
significantly higher then number of partitions, this could be slow and significantly higher then number of partitions, this could be slow and
we could invent some other approach. The "run over list array" part is we could invent some other approach. The "run over list array" part is
already wrapped in a get_next()-like function. already wrapped in a get_next()-like function.
RETURN RETURN
The edge of corresponding part_info->list_array part. The edge of corresponding sub-array of part_info->list_array
*/ */
uint32 get_list_array_idx_for_endpoint(partition_info *part_info, uint32 get_list_array_idx_for_endpoint(partition_info *part_info,
...@@ -2541,6 +2541,7 @@ uint32 get_list_array_idx_for_endpoint(partition_info *part_info, ...@@ -2541,6 +2541,7 @@ uint32 get_list_array_idx_for_endpoint(partition_info *part_info,
uint list_index; uint list_index;
longlong list_value; longlong list_value;
uint min_list_index= 0, max_list_index= part_info->no_list_values - 1; uint min_list_index= 0, max_list_index= part_info->no_list_values - 1;
/* Get the partitioning function value for the endpoint */
longlong part_func_value= part_info->part_expr->val_int(); longlong part_func_value= part_info->part_expr->val_int();
while (max_list_index >= min_list_index) while (max_list_index >= min_list_index)
{ {
...@@ -2596,7 +2597,7 @@ bool get_partition_id_range(partition_info *part_info, ...@@ -2596,7 +2597,7 @@ bool get_partition_id_range(partition_info *part_info,
/* /*
Find the part of part_info->range_int_array that covers the given interval Find the sub-array of part_info->range_int_array that covers given interval
SYNOPSIS SYNOPSIS
get_partition_id_range_for_endpoint() get_partition_id_range_for_endpoint()
...@@ -2607,7 +2608,7 @@ bool get_partition_id_range(partition_info *part_info, ...@@ -2607,7 +2608,7 @@ bool get_partition_id_range(partition_info *part_info,
interval interval
DESCRIPTION DESCRIPTION
This function finds the part of part_info->range_int_array where the This function finds the sub-array of part_info->range_int_array where the
elements have non-empty intersections with the given interval. elements have non-empty intersections with the given interval.
A range_int_array element at index idx represents the interval A range_int_array element at index idx represents the interval
...@@ -2617,14 +2618,13 @@ bool get_partition_id_range(partition_info *part_info, ...@@ -2617,14 +2618,13 @@ bool get_partition_id_range(partition_info *part_info,
intervals are disjoint and ordered by their right bound, so intervals are disjoint and ordered by their right bound, so
1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the 1. For [a; +inf) or (a; +inf)-type intervals (left_endpoint==TRUE), the
sought array part starts at some index idx and continues till array sought sub-array starts at some index idx and continues till array end.
end.
The function returns first number idx, such that the interval The function returns first number idx, such that the interval
represented by range_int_array[idx] has non empty intersection with represented by range_int_array[idx] has non empty intersection with
the passed interval. the passed interval.
2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the 2. For (-inf; a] or (-inf; a)-type intervals (left_endpoint==FALSE), the
sought array part starts at array start and continues till some last sought sub-array starts at array start and continues till some last
index idx. index idx.
The function returns first number idx, such that the interval The function returns first number idx, such that the interval
represented by range_int_array[idx] has EMPTY intersection with the represented by range_int_array[idx] has EMPTY intersection with the
...@@ -2634,7 +2634,7 @@ bool get_partition_id_range(partition_info *part_info, ...@@ -2634,7 +2634,7 @@ bool get_partition_id_range(partition_info *part_info,
returned. returned.
RETURN RETURN
The edge of corresponding part_info->range_int_array part. The edge of corresponding part_info->range_int_array sub-array.
*/ */
uint32 get_partition_id_range_for_endpoint(partition_info *part_info, uint32 get_partition_id_range_for_endpoint(partition_info *part_info,
...@@ -2645,6 +2645,7 @@ uint32 get_partition_id_range_for_endpoint(partition_info *part_info, ...@@ -2645,6 +2645,7 @@ uint32 get_partition_id_range_for_endpoint(partition_info *part_info,
longlong *range_array= part_info->range_int_array; longlong *range_array= part_info->range_int_array;
uint max_partition= part_info->no_parts - 1; uint max_partition= part_info->no_parts - 1;
uint min_part_id= 0, max_part_id= max_partition, loc_part_id; uint min_part_id= 0, max_part_id= max_partition, loc_part_id;
/* Get the partitioning function value for the endpoint */
longlong part_func_value= part_info->part_expr->val_int(); longlong part_func_value= part_info->part_expr->val_int();
while (max_part_id > min_part_id) while (max_part_id > min_part_id)
{ {
......
...@@ -297,6 +297,7 @@ struct st_table { ...@@ -297,6 +297,7 @@ struct st_table {
FILESORT_INFO sort; FILESORT_INFO sort;
#ifdef WITH_PARTITION_STORAGE_ENGINE #ifdef WITH_PARTITION_STORAGE_ENGINE
partition_info *part_info; /* Partition related information */ partition_info *part_info; /* Partition related information */
bool no_partitions_used; /* If true, all partitions have been pruned away */
#endif #endif
bool fill_item_list(List<Item> *item_list) const; bool fill_item_list(List<Item> *item_list) const;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment