Commit 476b24d0 authored by Monty's avatar Monty

MDEV-20057 Distinct SUM on CROSS JOIN and grouped returns wrong result

SELECT DISTINCT did not work with expressions with sum functions.
Distinct was only done on the values stored in the intermediate temporary
tables, which only stored the value of each sum function.

In other words:
SELECT DISTINCT sum(a),sum(b),avg(c) ... worked.
SELECT DISTINCT sum(a),sum(b) > 2,sum(c)+sum(d) would not work.

The later query would do ONLY apply distinct on the sum(a) part.

Reviewer: Sergei Petrunia <sergey@mariadb.com>


This was fixed by extending remove_dup_with_hash_index() and
remove_dup_with_compare() to take into account the columns in the result
list that where not stored in the temporary table.

Note that in many cases the above dup removal functions are not used as
the optimizer may be able to either remove duplicates early or it will
discover that duplicate remove is not needed. The later happens for
example if the group by fields is part of the result.

Other things:
- Backported from 11.0 the change of Sort_param.tmp_buffer from char* to
  String.
- Changed Type_handler::make_sort_key() to take String as a parameter
  instead of Sort_param. This was done to allow make_sort_key() functions
  to be reused by distinct elimination functions.
  This makes Type_handler_string_result::make_sort_key() similar to code
  in 11.0
- Simplied error handling in remove_dup_with_compare() to remove code
  duplication.
parent bd0d7ea5
......@@ -1070,3 +1070,40 @@ UNION
1
drop table t1;
End of 5.5 tests
#
# MDEV-20057 Distinct SUM on CROSS JOIN and grouped returns wrong result
#
create table t1 (c int, d int);
insert into t1 values (5, 1), (0, 3);
select distinct sum(distinct 1), sum(t1.d) > 2 from (t1 e join t1) group by t1.c;
sum(distinct 1) sum(t1.d) > 2
1 1
1 0
select distinct sum(distinct 1), sum(t1.d) > 2, t1.c from (t1 e join t1) group by t1.c;
sum(distinct 1) sum(t1.d) > 2 c
1 1 0
1 0 5
insert into t1 values (6,6);
select distinct sum(distinct 1), sum(t1.d) > 5 from (t1 e join t1) group by t1.c;
sum(distinct 1) sum(t1.d) > 5
1 1
1 0
select distinct sum(distinct 1), sum(t1.d) > 5, t1.c from (t1 e join t1) group by t1.c;
sum(distinct 1) sum(t1.d) > 5 c
1 1 0
1 0 5
1 1 6
set @@sort_buffer_size=1024;
insert into t1 select -seq,-seq from seq_1_to_100;
select distinct sum(distinct 1), sum(t1.d) > 2, length(group_concat(t1.d)) > 1000 from (t1 e join t1) group by t1.c having t1.c > -2 ;
sum(distinct 1) sum(t1.d) > 2 length(group_concat(t1.d)) > 1000
1 0 0
1 1 0
select distinct sum(distinct 1), sum(t1.d) > 2, length(group_concat(t1.d)) > 1000,t1.c from (t1 e join t1) group by t1.c having t1.c > -2;
sum(distinct 1) sum(t1.d) > 2 length(group_concat(t1.d)) > 1000 c
1 0 0 -1
1 1 0 0
1 1 0 5
1 1 0 6
drop table t1;
# End of 10.4 tests
......@@ -4,6 +4,7 @@
#
--source include/default_optimizer_switch.inc
--source include/have_sequence.inc
--disable_warnings
drop table if exists t1,t2,t3;
--enable_warnings
......@@ -818,3 +819,25 @@ UNION
drop table t1;
--echo End of 5.5 tests
--echo #
--echo # MDEV-20057 Distinct SUM on CROSS JOIN and grouped returns wrong result
--echo #
create table t1 (c int, d int);
insert into t1 values (5, 1), (0, 3);
select distinct sum(distinct 1), sum(t1.d) > 2 from (t1 e join t1) group by t1.c;
select distinct sum(distinct 1), sum(t1.d) > 2, t1.c from (t1 e join t1) group by t1.c;
insert into t1 values (6,6);
select distinct sum(distinct 1), sum(t1.d) > 5 from (t1 e join t1) group by t1.c;
select distinct sum(distinct 1), sum(t1.d) > 5, t1.c from (t1 e join t1) group by t1.c;
# Force usage of remove_dup_with_compare() algorithm
set @@sort_buffer_size=1024;
insert into t1 select -seq,-seq from seq_1_to_100;
select distinct sum(distinct 1), sum(t1.d) > 2, length(group_concat(t1.d)) > 1000 from (t1 e join t1) group by t1.c having t1.c > -2 ;
select distinct sum(distinct 1), sum(t1.d) > 2, length(group_concat(t1.d)) > 1000,t1.c from (t1 e join t1) group by t1.c having t1.c > -2;
drop table t1;
--echo # End of 10.4 tests
......@@ -159,7 +159,7 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
MYSQL_FILESORT_START(table->s->db.str, table->s->table_name.str);
DEBUG_SYNC(thd, "filesort_start");
if (!(sort= new SORT_INFO))
if (!(sort= new SORT_INFO)) // Note that this is not automatically freed!
return 0;
if (subselect && subselect->filesort_buffer.is_allocated())
......@@ -186,10 +186,6 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
sort->addon_buf= param.addon_buf;
sort->addon_field= param.addon_field;
sort->unpack= unpack_addon_fields;
if (multi_byte_charset &&
!(param.tmp_buffer= (char*) my_malloc(param.sort_length,
MYF(MY_WME | MY_THREAD_SPECIFIC))))
goto err;
if (select && select->quick)
thd->inc_status_sort_range();
......@@ -254,6 +250,9 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
tracker->report_sort_buffer_size(sort->sort_buffer_size());
}
if (param.tmp_buffer.alloc(param.sort_length))
goto err;
if (open_cached_file(&buffpek_pointers,mysql_tmpdir,TEMP_PREFIX,
DISK_BUFFER_SIZE, MYF(MY_WME)))
goto err;
......@@ -337,7 +336,7 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
error= 0;
err:
my_free(param.tmp_buffer);
param.tmp_buffer.free();
if (!subselect || !subselect->is_uncacheable())
{
sort->free_sort_buffer();
......@@ -977,17 +976,15 @@ static inline void store_length(uchar *to, uint length, uint pack_length)
void
Type_handler_string_result::make_sort_key(uchar *to, Item *item,
const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const
String *tmp_buffer) const
{
CHARSET_INFO *cs= item->collation.collation;
bool maybe_null= item->maybe_null;
if (maybe_null)
*to++= 1;
char *tmp_buffer= param->tmp_buffer ? param->tmp_buffer : (char*) to;
String tmp(tmp_buffer, param->tmp_buffer ? param->sort_length :
sort_field->length, cs);
String *res= item->str_result(&tmp);
Binary_string *res= item->str_result(tmp_buffer);
if (!res)
{
if (maybe_null)
......@@ -1015,11 +1012,11 @@ Type_handler_string_result::make_sort_key(uchar *to, Item *item,
size_t tmp_length=
#endif
cs->coll->strnxfrm(cs, to, sort_field->length,
item->max_char_length() *
cs->strxfrm_multiply,
(uchar*) res->ptr(), res->length(),
MY_STRXFRM_PAD_WITH_SPACE |
MY_STRXFRM_PAD_TO_MAXLEN);
item->max_char_length() *
cs->strxfrm_multiply,
(uchar*) res->ptr(), res->length(),
MY_STRXFRM_PAD_WITH_SPACE |
MY_STRXFRM_PAD_TO_MAXLEN);
DBUG_ASSERT(tmp_length == sort_field->length);
}
else
......@@ -1050,7 +1047,7 @@ Type_handler_string_result::make_sort_key(uchar *to, Item *item,
void
Type_handler_int_result::make_sort_key(uchar *to, Item *item,
const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const
String *tmp_buffer) const
{
longlong value= item->val_int_result();
make_sort_key_longlong(to, item->maybe_null, item->null_value,
......@@ -1061,7 +1058,7 @@ Type_handler_int_result::make_sort_key(uchar *to, Item *item,
void
Type_handler_temporal_result::make_sort_key(uchar *to, Item *item,
const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const
String *tmp_buffer) const
{
MYSQL_TIME buf;
// This is a temporal type. No nanoseconds. Rounding mode is not important.
......@@ -1083,7 +1080,7 @@ Type_handler_temporal_result::make_sort_key(uchar *to, Item *item,
void
Type_handler_timestamp_common::make_sort_key(uchar *to, Item *item,
const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const
String *tmp_buffer) const
{
THD *thd= current_thd;
uint binlen= my_timestamp_binary_length(item->decimals);
......@@ -1147,7 +1144,7 @@ Type_handler::make_sort_key_longlong(uchar *to,
void
Type_handler_decimal_result::make_sort_key(uchar *to, Item *item,
const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const
String *tmp_buffer) const
{
my_decimal dec_buf, *dec_val= item->val_decimal_result(&dec_buf);
if (item->maybe_null)
......@@ -1167,7 +1164,7 @@ Type_handler_decimal_result::make_sort_key(uchar *to, Item *item,
void
Type_handler_real_result::make_sort_key(uchar *to, Item *item,
const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const
String *tmp_buffer) const
{
double value= item->val_result();
if (item->maybe_null)
......@@ -1205,7 +1202,8 @@ static void make_sortkey(Sort_param *param, uchar *to, uchar *ref_pos)
else
{ // Item
sort_field->item->type_handler()->make_sort_key(to, sort_field->item,
sort_field, param);
sort_field,
&param->tmp_buffer);
if ((maybe_null= sort_field->item->maybe_null))
to++;
}
......
This diff is collapsed.
......@@ -19,6 +19,7 @@
#include "my_base.h" /* ha_rows */
#include <my_sys.h> /* qsort2_cmp */
#include "queues.h"
#include "sql_string.h"
typedef struct st_buffpek BUFFPEK;
......@@ -82,14 +83,20 @@ class Sort_param {
uchar *unique_buff;
bool not_killable;
char* tmp_buffer;
String tmp_buffer;
// The fields below are used only by Unique class.
qsort2_cmp compare;
BUFFPEK_COMPARE_CONTEXT cmp_context;
Sort_param()
{
memset(this, 0, sizeof(*this));
memset(reinterpret_cast<void*>(this), 0, sizeof(*this));
tmp_buffer.set_thread_specific();
/*
Fix memset() clearing the charset.
TODO: The constructor should be eventually rewritten not to use memset().
*/
tmp_buffer.set_charset(&my_charset_bin);
}
void init_for_filesort(uint sortlen, TABLE *table,
ha_rows maxrows, bool sort_positions);
......
......@@ -3734,7 +3734,7 @@ class Type_handler
virtual void make_sort_key(uchar *to, Item *item,
const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const= 0;
String *tmp) const= 0;
virtual void sortlength(THD *thd,
const Type_std_attributes *item,
SORT_FIELD_ATTR *attr) const= 0;
......@@ -4120,7 +4120,7 @@ class Type_handler_row: public Type_handler
const Bit_addr &bit,
const Column_definition_attributes *attr,
uint32 flags) const override;
void make_sort_key(uchar *, Item *, const SORT_FIELD_ATTR *, Sort_param *)
void make_sort_key(uchar *, Item *, const SORT_FIELD_ATTR *, String *tmp)
const override
{
MY_ASSERT_UNREACHABLE();
......@@ -4431,7 +4431,7 @@ class Type_handler_real_result: public Type_handler_numeric
const Item *outer,
bool is_in_predicate) const;
void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const;
String *tmp) const;
void sortlength(THD *thd,
const Type_std_attributes *item,
SORT_FIELD_ATTR *attr) const;
......@@ -4519,7 +4519,7 @@ class Type_handler_decimal_result: public Type_handler_numeric
bool is_in_predicate) const;
Field *make_num_distinct_aggregator_field(MEM_ROOT *, const Item *) const;
void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const;
String *tmp) const;
void sortlength(THD *thd,
const Type_std_attributes *item,
SORT_FIELD_ATTR *attr) const;
......@@ -4745,7 +4745,7 @@ class Type_handler_int_result: public Type_handler_numeric
bool is_in_predicate) const;
Field *make_num_distinct_aggregator_field(MEM_ROOT *, const Item *) const;
void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const;
String *tmp) const;
void sortlength(THD *thd,
const Type_std_attributes *item,
SORT_FIELD_ATTR *attr) const;
......@@ -4834,7 +4834,7 @@ class Type_handler_temporal_result: public Type_handler
Item_result cmp_type() const { return TIME_RESULT; }
virtual ~Type_handler_temporal_result() = default;
void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const;
String *tmp) const;
void sortlength(THD *thd,
const Type_std_attributes *item,
SORT_FIELD_ATTR *attr) const;
......@@ -4921,7 +4921,7 @@ class Type_handler_string_result: public Type_handler
type_handler_adjusted_to_max_octet_length(uint max_octet_length,
CHARSET_INFO *cs) const;
void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const;
String *tmp) const;
void sortlength(THD *thd,
const Type_std_attributes *item,
SORT_FIELD_ATTR *attr) const;
......@@ -5953,7 +5953,7 @@ class Type_handler_timestamp_common: public Type_handler_temporal_with_date
cmp_item *make_cmp_item(THD *thd, CHARSET_INFO *cs) const;
in_vector *make_in_vector(THD *thd, const Item_func_in *f, uint nargs) const;
void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const;
String *tmp) const;
void sortlength(THD *thd,
const Type_std_attributes *item,
SORT_FIELD_ATTR *attr) const;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment