Commit 0494d585 authored by Mattias Jonsson's avatar Mattias Jonsson

Bug#30573: Ordered range scan over partitioned tables returns some rows twice

and
Bug#33555: Group By Query does not correctly aggregate partitions

Backport of bug-33257 which is the same bug.

read_range_*() calls was not passed to the partition handlers,
but was translated to index_read/next family calls.
Resulting in duplicates rows and wrong aggregations.
parent cb8a39e7
...@@ -742,3 +742,23 @@ WHERE (a >= '2004-07-01' AND a <= '2004-09-30') OR ...@@ -742,3 +742,23 @@ WHERE (a >= '2004-07-01' AND a <= '2004-09-30') OR
id select_type table partitions type possible_keys key key_len ref rows Extra id select_type table partitions type possible_keys key key_len ref rows Extra
1 SIMPLE t1 p407,p408,p409,p507,p508,p509 ALL NULL NULL NULL NULL 18 Using where 1 SIMPLE t1 p407,p408,p409,p507,p508,p509 ALL NULL NULL NULL NULL 18 Using where
DROP TABLE t1; DROP TABLE t1;
create table t1 (a int);
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
CREATE TABLE t2 (
defid int(10) unsigned NOT NULL,
day int(10) unsigned NOT NULL,
count int(10) unsigned NOT NULL,
filler char(200),
KEY (defid,day)
)
PARTITION BY RANGE (day) (
PARTITION p7 VALUES LESS THAN (20070401) ,
PARTITION p8 VALUES LESS THAN (20070501));
insert into t2 select 20, 20070311, 1, 'filler' from t1 A, t1 B;
insert into t2 select 20, 20070411, 1, 'filler' from t1 A, t1 B;
insert into t2 values(52, 20070321, 123, 'filler') ;
insert into t2 values(52, 20070322, 456, 'filler') ;
select sum(count) from t2 ch where ch.defid in (50,52) and ch.day between 20070320 and 20070401 group by defid;
sum(count)
579
drop table t1, t2;
...@@ -807,24 +807,24 @@ DROP TABLE t1; ...@@ -807,24 +807,24 @@ DROP TABLE t1;
# #
# BUG#30573: get wrong result with "group by" on PARTITIONed table # BUG#30573: get wrong result with "group by" on PARTITIONed table
# #
#create table t1 (a int); create table t1 (a int);
#insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
#CREATE TABLE t2 ( CREATE TABLE t2 (
# defid int(10) unsigned NOT NULL, defid int(10) unsigned NOT NULL,
# day int(10) unsigned NOT NULL, day int(10) unsigned NOT NULL,
# count int(10) unsigned NOT NULL, count int(10) unsigned NOT NULL,
# filler char(200), filler char(200),
# KEY (defid,day) KEY (defid,day)
#) )
#PARTITION BY RANGE (day) ( PARTITION BY RANGE (day) (
# PARTITION p7 VALUES LESS THAN (20070401) , PARTITION p7 VALUES LESS THAN (20070401) ,
# PARTITION p8 VALUES LESS THAN (20070501)); PARTITION p8 VALUES LESS THAN (20070501));
#insert into t2 select 20, 20070311, 1, 'filler' from t1 A, t1 B; insert into t2 select 20, 20070311, 1, 'filler' from t1 A, t1 B;
#insert into t2 select 20, 20070411, 1, 'filler' from t1 A, t1 B; insert into t2 select 20, 20070411, 1, 'filler' from t1 A, t1 B;
#insert into t2 values(52, 20070321, 123, 'filler') ; insert into t2 values(52, 20070321, 123, 'filler') ;
#insert into t2 values(52, 20070322, 456, 'filler') ; insert into t2 values(52, 20070322, 456, 'filler') ;
#select sum(count) from t2 ch where ch.defid in (50,52) and ch.day between 20070320 and 20070401 group by defid; select sum(count) from t2 ch where ch.defid in (50,52) and ch.day between 20070320 and 20070401 group by defid;
#drop table t1, t2; drop table t1, t2;
...@@ -3679,10 +3679,12 @@ int ha_partition::index_read_map(uchar *buf, const uchar *key, ...@@ -3679,10 +3679,12 @@ int ha_partition::index_read_map(uchar *buf, const uchar *key,
enum ha_rkey_function find_flag) enum ha_rkey_function find_flag)
{ {
DBUG_ENTER("ha_partition::index_read_map"); DBUG_ENTER("ha_partition::index_read_map");
end_range= 0; end_range= 0;
m_index_scan_type= partition_index_read; m_index_scan_type= partition_index_read;
DBUG_RETURN(common_index_read(buf, key, keypart_map, find_flag)); m_start_key.key= key;
m_start_key.keypart_map= keypart_map;
m_start_key.flag= find_flag;
DBUG_RETURN(common_index_read(buf, TRUE));
} }
...@@ -3690,41 +3692,63 @@ int ha_partition::index_read_map(uchar *buf, const uchar *key, ...@@ -3690,41 +3692,63 @@ int ha_partition::index_read_map(uchar *buf, const uchar *key,
Common routine for a number of index_read variants Common routine for a number of index_read variants
SYNOPSIS SYNOPSIS
common_index_read ha_partition::common_index_read()
buf Buffer where the record should be returned
see index_read for rest have_start_key TRUE <=> the left endpoint is available, i.e.
we're in index_read call or in read_range_first
call and the range has left endpoint
FALSE <=> there is no left endpoint (we're in
read_range_first() call and the range has no left
endpoint)
DESCRIPTION
Start scanning the range (when invoked from read_range_first()) or doing
an index lookup (when invoked from index_read_XXX):
- If possible, perform partition selection
- Find the set of partitions we're going to use
- Depending on whether we need ordering:
NO: Get the first record from first used partition (see
handle_unordered_scan_next_partition)
YES: Fill the priority queue and get the record that is the first in
the ordering
RETURN
0 OK
other HA_ERR_END_OF_FILE or other error code.
*/ */
int ha_partition::common_index_read(uchar *buf, const uchar *key, int ha_partition::common_index_read(uchar *buf, bool have_start_key)
key_part_map keypart_map,
enum ha_rkey_function find_flag)
{ {
int error; int error;
uint key_len;
bool reverse_order= FALSE; bool reverse_order= FALSE;
uint key_len= calculate_key_len(table, active_index, key, keypart_map);
DBUG_ENTER("ha_partition::common_index_read"); DBUG_ENTER("ha_partition::common_index_read");
LINT_INIT(key_len); /* used if have_start_key==TRUE */
memcpy((void*)m_start_key.key, key, key_len); if (have_start_key)
m_start_key.keypart_map= keypart_map; {
m_start_key.length= key_len; m_start_key.length= key_len= calculate_key_len(table, active_index,
m_start_key.flag= find_flag; m_start_key.key,
m_start_key.keypart_map);
if ((error= partition_scan_set_up(buf, TRUE))) }
if ((error= partition_scan_set_up(buf, have_start_key)))
{ {
DBUG_RETURN(error); DBUG_RETURN(error);
} }
if (find_flag == HA_READ_PREFIX_LAST ||
find_flag == HA_READ_PREFIX_LAST_OR_PREV || if (have_start_key &&
find_flag == HA_READ_BEFORE_KEY) (m_start_key.flag == HA_READ_PREFIX_LAST ||
m_start_key.flag == HA_READ_PREFIX_LAST_OR_PREV ||
m_start_key.flag == HA_READ_BEFORE_KEY))
{ {
reverse_order= TRUE; reverse_order= TRUE;
m_ordered_scan_ongoing= TRUE; m_ordered_scan_ongoing= TRUE;
} }
if (!m_ordered_scan_ongoing || if (!m_ordered_scan_ongoing ||
(find_flag == HA_READ_KEY_EXACT && (have_start_key && m_start_key.flag == HA_READ_KEY_EXACT &&
(key_len >= m_curr_key_info->key_length || (key_len >= m_curr_key_info->key_length || key_len == 0)))
key_len == 0))) {
{
/* /*
We use unordered index scan either when read_range is used and flag We use unordered index scan either when read_range is used and flag
is set to not use ordered or when an exact key is used and in this is set to not use ordered or when an exact key is used and in this
...@@ -3815,7 +3839,7 @@ int ha_partition::index_last(uchar * buf) ...@@ -3815,7 +3839,7 @@ int ha_partition::index_last(uchar * buf)
Common routine for index_first/index_last Common routine for index_first/index_last
SYNOPSIS SYNOPSIS
common_index_first_last ha_partition::common_first_last()
see index_first for rest see index_first for rest
*/ */
...@@ -3859,7 +3883,10 @@ int ha_partition::index_read_last_map(uchar *buf, const uchar *key, ...@@ -3859,7 +3883,10 @@ int ha_partition::index_read_last_map(uchar *buf, const uchar *key,
m_ordered= TRUE; // Safety measure m_ordered= TRUE; // Safety measure
end_range= 0; end_range= 0;
m_index_scan_type= partition_index_read_last; m_index_scan_type= partition_index_read_last;
DBUG_RETURN(common_index_read(buf, key, keypart_map, HA_READ_PREFIX_LAST)); m_start_key.key= key;
m_start_key.keypart_map= keypart_map;
m_start_key.flag= HA_READ_PREFIX_LAST;
DBUG_RETURN(common_index_read(buf, TRUE));
} }
...@@ -3990,23 +4017,15 @@ int ha_partition::read_range_first(const key_range *start_key, ...@@ -3990,23 +4017,15 @@ int ha_partition::read_range_first(const key_range *start_key,
((end_key->flag == HA_READ_BEFORE_KEY) ? 1 : ((end_key->flag == HA_READ_BEFORE_KEY) ? 1 :
(end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0); (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0);
} }
range_key_part= m_curr_key_info->key_part;
if (!start_key) // Read first record range_key_part= m_curr_key_info->key_part;
{ if (start_key)
if (m_ordered) m_start_key= *start_key;
m_index_scan_type= partition_index_first;
else
m_index_scan_type= partition_index_first_unordered;
error= common_first_last(m_rec0);
}
else else
{ m_start_key.key= NULL;
m_index_scan_type= partition_index_read;
error= common_index_read(m_rec0, m_index_scan_type= partition_read_range;
start_key->key, error= common_index_read(m_rec0, test(start_key));
start_key->keypart_map, start_key->flag);
}
DBUG_RETURN(error); DBUG_RETURN(error);
} }
...@@ -4028,26 +4047,36 @@ int ha_partition::read_range_next() ...@@ -4028,26 +4047,36 @@ int ha_partition::read_range_next()
if (m_ordered) if (m_ordered)
{ {
DBUG_RETURN(handler::read_range_next()); DBUG_RETURN(handle_ordered_next(table->record[0], eq_range));
} }
DBUG_RETURN(handle_unordered_next(m_rec0, eq_range)); DBUG_RETURN(handle_unordered_next(table->record[0], eq_range));
} }
/* /*
Common routine to set up scans Common routine to set up index scans
SYNOPSIS SYNOPSIS
buf Buffer to later return record in ha_partition::partition_scan_set_up()
idx_read_flag Is it index scan buf Buffer to later return record in (this function
needs it to calculcate partitioning function
values)
idx_read_flag TRUE <=> m_start_key has range start endpoint which
probably can be used to determine the set of partitions
to scan.
FALSE <=> there is no start endpoint.
DESCRIPTION
Find out which partitions we'll need to read when scanning the specified
range.
If we need to scan only one partition, set m_ordered_scan_ongoing=FALSE
as we will not need to do merge ordering.
RETURN VALUE RETURN VALUE
>0 Error code >0 Error code
0 Success 0 Success
DESCRIPTION
This is where we check which partitions to actually scan if not all
of them
*/ */
int ha_partition::partition_scan_set_up(uchar * buf, bool idx_read_flag) int ha_partition::partition_scan_set_up(uchar * buf, bool idx_read_flag)
...@@ -4138,10 +4167,19 @@ int ha_partition::handle_unordered_next(uchar *buf, bool is_next_same) ...@@ -4138,10 +4167,19 @@ int ha_partition::handle_unordered_next(uchar *buf, bool is_next_same)
DBUG_ENTER("ha_partition::handle_unordered_next"); DBUG_ENTER("ha_partition::handle_unordered_next");
/* /*
We should consider if this should be split into two functions as We should consider if this should be split into three functions as
next_same is alwas a local constant partition_read_range is_next_same are always local constants
*/ */
if (is_next_same)
if (m_index_scan_type == partition_read_range)
{
if (!(error= file->read_range_next()))
{
m_last_part= m_part_spec.start_part;
DBUG_RETURN(0);
}
}
else if (is_next_same)
{ {
if (!(error= file->index_next_same(buf, m_start_key.key, if (!(error= file->index_next_same(buf, m_start_key.key,
m_start_key.length))) m_start_key.length)))
...@@ -4150,15 +4188,13 @@ int ha_partition::handle_unordered_next(uchar *buf, bool is_next_same) ...@@ -4150,15 +4188,13 @@ int ha_partition::handle_unordered_next(uchar *buf, bool is_next_same)
DBUG_RETURN(0); DBUG_RETURN(0);
} }
} }
else if (!(error= file->index_next(buf))) else
{ {
if (!(file->index_flags(active_index, 0, 1) & HA_READ_ORDER) || if (!(error= file->index_next(buf)))
compare_key(end_range) <= 0)
{ {
m_last_part= m_part_spec.start_part; m_last_part= m_part_spec.start_part;
DBUG_RETURN(0); // Row was in range DBUG_RETURN(0); // Row was in range
} }
error= HA_ERR_END_OF_FILE;
} }
if (error == HA_ERR_END_OF_FILE) if (error == HA_ERR_END_OF_FILE)
...@@ -4202,6 +4238,11 @@ int ha_partition::handle_unordered_scan_next_partition(uchar * buf) ...@@ -4202,6 +4238,11 @@ int ha_partition::handle_unordered_scan_next_partition(uchar * buf)
file= m_file[i]; file= m_file[i];
m_part_spec.start_part= i; m_part_spec.start_part= i;
switch (m_index_scan_type) { switch (m_index_scan_type) {
case partition_read_range:
DBUG_PRINT("info", ("read_range_first on partition %d", i));
error= file->read_range_first(m_start_key.key? &m_start_key: NULL,
end_range, eq_range, FALSE);
break;
case partition_index_read: case partition_index_read:
DBUG_PRINT("info", ("index_read on partition %d", i)); DBUG_PRINT("info", ("index_read on partition %d", i));
error= file->index_read_map(buf, m_start_key.key, error= file->index_read_map(buf, m_start_key.key,
...@@ -4230,13 +4271,8 @@ int ha_partition::handle_unordered_scan_next_partition(uchar * buf) ...@@ -4230,13 +4271,8 @@ int ha_partition::handle_unordered_scan_next_partition(uchar * buf)
} }
if (!error) if (!error)
{ {
if (!(file->index_flags(active_index, 0, 1) & HA_READ_ORDER) || m_last_part= i;
compare_key(end_range) <= 0) DBUG_RETURN(0);
{
m_last_part= i;
DBUG_RETURN(0);
}
error= HA_ERR_END_OF_FILE;
} }
if ((error != HA_ERR_END_OF_FILE) && (error != HA_ERR_KEY_NOT_FOUND)) if ((error != HA_ERR_END_OF_FILE) && (error != HA_ERR_KEY_NOT_FOUND))
DBUG_RETURN(error); DBUG_RETURN(error);
...@@ -4315,6 +4351,17 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order) ...@@ -4315,6 +4351,17 @@ int ha_partition::handle_ordered_index_scan(uchar *buf, bool reverse_order)
m_start_key.keypart_map); m_start_key.keypart_map);
reverse_order= TRUE; reverse_order= TRUE;
break; break;
case partition_read_range:
{
/*
This can only read record to table->record[0], as it was set when
the table was being opened. We have to memcpy data ourselves.
*/
error= file->read_range_first(&m_start_key, end_range, eq_range, TRUE);
memcpy(rec_buf_ptr, table->record[0], m_rec_length);
reverse_order= FALSE;
break;
}
default: default:
DBUG_ASSERT(FALSE); DBUG_ASSERT(FALSE);
DBUG_RETURN(HA_ERR_END_OF_FILE); DBUG_RETURN(HA_ERR_END_OF_FILE);
...@@ -4395,8 +4442,13 @@ int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same) ...@@ -4395,8 +4442,13 @@ int ha_partition::handle_ordered_next(uchar *buf, bool is_next_same)
uint part_id= m_top_entry; uint part_id= m_top_entry;
handler *file= m_file[part_id]; handler *file= m_file[part_id];
DBUG_ENTER("ha_partition::handle_ordered_next"); DBUG_ENTER("ha_partition::handle_ordered_next");
if (!is_next_same) if (m_index_scan_type == partition_read_range)
{
error= file->read_range_next();
memcpy(rec_buf(part_id), table->record[0], m_rec_length);
}
else if (!is_next_same)
error= file->index_next(rec_buf(part_id)); error= file->index_next(rec_buf(part_id));
else else
error= file->index_next_same(rec_buf(part_id), m_start_key.key, error= file->index_next_same(rec_buf(part_id), m_start_key.key,
......
...@@ -49,7 +49,8 @@ private: ...@@ -49,7 +49,8 @@ private:
partition_index_first_unordered= 2, partition_index_first_unordered= 2,
partition_index_last= 3, partition_index_last= 3,
partition_index_read_last= 4, partition_index_read_last= 4,
partition_no_index_scan= 5 partition_read_range = 5,
partition_no_index_scan= 6
}; };
/* Data for the partition handler */ /* Data for the partition handler */
int m_mode; // Open mode int m_mode; // Open mode
...@@ -63,8 +64,6 @@ private: ...@@ -63,8 +64,6 @@ private:
handler **m_reorged_file; // Reorganised partitions handler **m_reorged_file; // Reorganised partitions
handler **m_added_file; // Added parts kept for errors handler **m_added_file; // Added parts kept for errors
partition_info *m_part_info; // local reference to partition partition_info *m_part_info; // local reference to partition
uchar *m_start_key_ref; // Reference of start key in current
// index scan info
Field **m_part_field_array; // Part field array locally to save acc Field **m_part_field_array; // Part field array locally to save acc
uchar *m_ordered_rec_buffer; // Row and key buffer for ord. idx scan uchar *m_ordered_rec_buffer; // Row and key buffer for ord. idx scan
KEY *m_curr_key_info; // Current index KEY *m_curr_key_info; // Current index
...@@ -429,9 +428,7 @@ public: ...@@ -429,9 +428,7 @@ public:
virtual int read_range_next(); virtual int read_range_next();
private: private:
int common_index_read(uchar * buf, const uchar * key, int common_index_read(uchar * buf, bool have_start_key);
key_part_map keypart_map,
enum ha_rkey_function find_flag);
int common_first_last(uchar * buf); int common_first_last(uchar * buf);
int partition_scan_set_up(uchar * buf, bool idx_read_flag); int partition_scan_set_up(uchar * buf, bool idx_read_flag);
int handle_unordered_next(uchar * buf, bool next_same); int handle_unordered_next(uchar * buf, bool next_same);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment