Commit 90780bb5 authored by Igor Babaev's avatar Igor Babaev

MDEV-21104 Wrong result (extra rows and wrong values) with incremental BNLH

This bug could affect multi-way join queries with embedded outer joins that
contained a conjunctive IS NULL predicate over a non-nullable column from
inner table of an outer join. The predicate could occur in WHERE condition
or in ON condition. Due to this bug a wrong result set could be returned by
the query. The bug manifested itself only when join buffers were employed
for join operations.

The problem appeared because
- a bug in the function JOIN_CACHE::get_match_flag_by_pos that not always
  returned proper match flags for embedding outer joins stored together
  with table rows put a join buffer.
- bug in the function JOIN_CACHE::join_matching_records that not always
  correctly determined that a row from the buffer could be skipped due
  to applied 'not_exists' optimization.
Example:
  SELECT * FROM t1 LEFT JOIN ((t2 LEFT JOIN t3 ON c = d) JOIN t4) ON b = e
    WHERE e IS NULL;

The patch introduces a new function that finds the match flag for a record
from join buffer specifying the buffer where this flag has to be found.
The function is called JOIN_CACHE::get_match_flag_by_pos_from_join_buffer().
Now this function rather than JOIN_CACHE::get_match_flag_by_pos() is used
in JOIN_CACHE::skip_if_matched() to check whether a record from the join
buffer must be ignored when extending the record by null complements.
Also the code of the function JOIN_CACHE::skip_if_not_needed_match() has
been changed. The function checks whether a record from the join buffer
still may produce some useful extensions.
Also some clarifying comments has been added.

Approved by monty@mariadb.com.
parent 1af85581
...@@ -6054,4 +6054,57 @@ select f2 from t2,t1 where f2 = 0; ...@@ -6054,4 +6054,57 @@ select f2 from t2,t1 where f2 = 0;
f2 f2
drop table t1, t2; drop table t1, t2;
set join_buffer_size=@save_join_buffer_size; set join_buffer_size=@save_join_buffer_size;
#
# MDEV-21104: BNLH used for multi-join query with embedded outer join
# and possible 'not exists' optimization
#
set join_cache_level=4;
CREATE TABLE t1 (a int) ENGINE=MyISAM;
INSERT INTO t1 VALUES (1),(2);
CREATE TABLE t2 (b int, c int) ENGINE=MyISAM;
INSERT INTO t2 VALUES (1,2),(2,4);
CREATE TABLE t3 (d int, KEY(d)) ENGINE=MyISAM;
INSERT INTO t3 VALUES (1),(2);
CREATE TABLE t4 (e int primary key) ENGINE=MyISAM;
INSERT INTO t4 VALUES (1),(2);
ANALYZE TABLE t1,t2,t3,t4;
Table Op Msg_type Msg_text
test.t1 analyze status OK
test.t2 analyze status OK
test.t3 analyze status OK
test.t4 analyze status OK
SELECT * FROM t2 LEFT JOIN t3 ON c = d;
b c d
1 2 2
2 4 NULL
SELECT * FROM (t2 LEFT JOIN t3 ON c = d ) JOIN t4;
b c d e
1 2 2 1
2 4 NULL 1
1 2 2 2
2 4 NULL 2
EXPLAIN SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 2
1 SIMPLE t2 ALL NULL NULL NULL NULL 2 Using where; Using join buffer (flat, BNL join)
1 SIMPLE t3 hash_index d #hash#d:d 5:5 test.t2.c 2 Using where; Using index; Using join buffer (incremental, BNLH join)
1 SIMPLE t4 hash_index PRIMARY #hash#PRIMARY:PRIMARY 4:4 test.t2.b 2 Using index; Using join buffer (incremental, BNLH join)
SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e;
a b c d e
1 1 2 2 1
2 1 2 2 1
1 2 4 NULL 2
2 2 4 NULL 2
EXPLAIN SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e
WHERE e IS NULL;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 2
1 SIMPLE t2 ALL NULL NULL NULL NULL 2 Using where; Using join buffer (flat, BNL join)
1 SIMPLE t3 hash_index d #hash#d:d 5:5 test.t2.c 2 Using where; Using index; Using join buffer (incremental, BNLH join)
1 SIMPLE t4 hash_index PRIMARY #hash#PRIMARY:PRIMARY 4:4 test.t2.b 2 Using where; Using index; Not exists; Using join buffer (incremental, BNLH join)
SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e
WHERE e IS NULL;
a b c d e
DROP TABLE t1,t2,t3,t4;
set join_cache_level=@save_join_cache_level;
set @@optimizer_switch=@save_optimizer_switch; set @@optimizer_switch=@save_optimizer_switch;
...@@ -4014,5 +4014,41 @@ select f2 from t2,t1 where f2 = 0; ...@@ -4014,5 +4014,41 @@ select f2 from t2,t1 where f2 = 0;
drop table t1, t2; drop table t1, t2;
set join_buffer_size=@save_join_buffer_size; set join_buffer_size=@save_join_buffer_size;
--echo #
--echo # MDEV-21104: BNLH used for multi-join query with embedded outer join
--echo # and possible 'not exists' optimization
--echo #
set join_cache_level=4;
CREATE TABLE t1 (a int) ENGINE=MyISAM;
INSERT INTO t1 VALUES (1),(2);
CREATE TABLE t2 (b int, c int) ENGINE=MyISAM;
INSERT INTO t2 VALUES (1,2),(2,4);
CREATE TABLE t3 (d int, KEY(d)) ENGINE=MyISAM;
INSERT INTO t3 VALUES (1),(2);
CREATE TABLE t4 (e int primary key) ENGINE=MyISAM;
INSERT INTO t4 VALUES (1),(2);
ANALYZE TABLE t1,t2,t3,t4;
SELECT * FROM t2 LEFT JOIN t3 ON c = d;
SELECT * FROM (t2 LEFT JOIN t3 ON c = d ) JOIN t4;
let $q1=
SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e;
eval EXPLAIN $q1;
eval $q1;
let $q2=
SELECT * FROM t1 LEFT JOIN ( ( t2 LEFT JOIN t3 ON c = d ) JOIN t4 ) ON b = e
WHERE e IS NULL;
eval EXPLAIN $q2;
eval $q2;
DROP TABLE t1,t2,t3,t4;
set join_cache_level=@save_join_cache_level;
# The following command must be the last one in the file # The following command must be the last one in the file
set @@optimizer_switch=@save_optimizer_switch; set @@optimizer_switch=@save_optimizer_switch;
...@@ -1661,6 +1661,7 @@ void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr) ...@@ -1661,6 +1661,7 @@ void JOIN_CACHE::get_record_by_pos(uchar *rec_ptr)
get the match flag for the record pointed by the reference at the position get the match flag for the record pointed by the reference at the position
rec_ptr. If the match flag is placed in one of the previous buffers the rec_ptr. If the match flag is placed in one of the previous buffers the
function first reaches the linked record fields in this buffer. function first reaches the linked record fields in this buffer.
The function returns the value of the first encountered match flag.
RETURN VALUE RETURN VALUE
match flag for the record at the position rec_ptr match flag for the record at the position rec_ptr
...@@ -1684,6 +1685,39 @@ enum JOIN_CACHE::Match_flag JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr) ...@@ -1684,6 +1685,39 @@ enum JOIN_CACHE::Match_flag JOIN_CACHE::get_match_flag_by_pos(uchar *rec_ptr)
} }
/*
Get the match flag for the referenced record from specified join buffer
SYNOPSIS
get_match_flag_by_pos_from_join_buffer()
rec_ptr position of the first field of the record in the join buffer
tab join table with join buffer where to look for the match flag
DESCRIPTION
This default implementation of the get_match_flag_by_pos_from_join_buffer
method gets the match flag for the record pointed by the reference at the
position rec_ptr from the join buffer attached to the join table tab.
RETURN VALUE
match flag for the record at the position rec_ptr from the join
buffer attached to the table tab.
*/
enum JOIN_CACHE::Match_flag
JOIN_CACHE::get_match_flag_by_pos_from_join_buffer(uchar *rec_ptr,
JOIN_TAB *tab)
{
DBUG_ASSERT(tab->cache && tab->cache->with_match_flag);
for (JOIN_CACHE *cache= this; ; )
{
if (cache->join_tab == tab)
return (enum Match_flag) rec_ptr[0];
cache= cache->prev_cache;
rec_ptr= cache->get_rec_ref(rec_ptr);
}
}
/* /*
Calculate the increment of the auxiliary buffer for a record write Calculate the increment of the auxiliary buffer for a record write
...@@ -1954,6 +1988,10 @@ bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy, ...@@ -1954,6 +1988,10 @@ bool JOIN_CACHE::read_referenced_field(CACHE_FIELD *copy,
If the record is skipped the value of 'pos' is set to point to the position If the record is skipped the value of 'pos' is set to point to the position
right after the record. right after the record.
NOTE
Currently this function is called only when generating null complemented
records for outer joins (=> only when join_tab->first_unmatched != NULL).
RETURN VALUE RETURN VALUE
TRUE the match flag is set to MATCH_FOUND and the record has been skipped TRUE the match flag is set to MATCH_FOUND and the record has been skipped
FALSE otherwise FALSE otherwise
...@@ -1966,7 +2004,9 @@ bool JOIN_CACHE::skip_if_matched() ...@@ -1966,7 +2004,9 @@ bool JOIN_CACHE::skip_if_matched()
if (prev_cache) if (prev_cache)
offset+= prev_cache->get_size_of_rec_offset(); offset+= prev_cache->get_size_of_rec_offset();
/* Check whether the match flag is MATCH_FOUND */ /* Check whether the match flag is MATCH_FOUND */
if (get_match_flag_by_pos(pos+offset) == MATCH_FOUND) if (get_match_flag_by_pos_from_join_buffer(pos+offset,
join_tab->first_unmatched) ==
MATCH_FOUND)
{ {
pos+= size_of_rec_len + get_rec_length(pos); pos+= size_of_rec_len + get_rec_length(pos);
return TRUE; return TRUE;
...@@ -1983,13 +2023,23 @@ bool JOIN_CACHE::skip_if_matched() ...@@ -1983,13 +2023,23 @@ bool JOIN_CACHE::skip_if_matched()
DESCRIPTION DESCRIPTION
This default implementation of the virtual function skip_if_not_needed_match This default implementation of the virtual function skip_if_not_needed_match
skips the next record from the join buffer if its match flag is not skips the next record from the join when generating join extensions
MATCH_NOT_FOUND, and, either its value is MATCH_FOUND and join_tab is the for the records in the join buffer depending on the value of the match flag.
first inner table of an inner join, or, its value is MATCH_IMPOSSIBLE - In the case of a semi-nest the match flag may be in two states
and join_tab is the first inner table of an outer join. {MATCH_NOT_FOUND, MATCH_FOUND}. The record is skipped if the flag is set
to MATCH_FOUND.
- In the case of a outer join nest when not_exists optimization is applied
the match may be in three states {MATCH_NOT_FOUND, MATCH_IMPOSSIBLE,
MATCH_FOUND. The record is skipped if the flag is set to MATCH_FOUND or
to MATCH_IMPOSSIBLE.
If the record is skipped the value of 'pos' is set to point to the position If the record is skipped the value of 'pos' is set to point to the position
right after the record. right after the record.
NOTE
Currently the function is called only when generating non-null complemented
extensions for records in the join buffer.
RETURN VALUE RETURN VALUE
TRUE the record has to be skipped TRUE the record has to be skipped
FALSE otherwise FALSE otherwise
...@@ -2000,11 +2050,19 @@ bool JOIN_CACHE::skip_if_not_needed_match() ...@@ -2000,11 +2050,19 @@ bool JOIN_CACHE::skip_if_not_needed_match()
DBUG_ASSERT(with_length); DBUG_ASSERT(with_length);
enum Match_flag match_fl; enum Match_flag match_fl;
uint offset= size_of_rec_len; uint offset= size_of_rec_len;
bool skip= FALSE;
if (prev_cache) if (prev_cache)
offset+= prev_cache->get_size_of_rec_offset(); offset+= prev_cache->get_size_of_rec_offset();
if ((match_fl= get_match_flag_by_pos(pos+offset)) != MATCH_NOT_FOUND && if (!join_tab->check_only_first_match())
(join_tab->check_only_first_match() == (match_fl == MATCH_FOUND)) ) return FALSE;
match_fl= get_match_flag_by_pos(pos+offset);
skip= join_tab->first_sj_inner_tab ?
match_fl == MATCH_FOUND : // the case of semi-join
match_fl != MATCH_NOT_FOUND; // the case of outer-join
if (skip)
{ {
pos+= size_of_rec_len + get_rec_length(pos); pos+= size_of_rec_len + get_rec_length(pos);
return TRUE; return TRUE;
...@@ -2104,7 +2162,14 @@ enum_nested_loop_state JOIN_CACHE::join_records(bool skip_last) ...@@ -2104,7 +2162,14 @@ enum_nested_loop_state JOIN_CACHE::join_records(bool skip_last)
goto finish; goto finish;
} }
join_tab->not_null_compl= FALSE; join_tab->not_null_compl= FALSE;
/* Prepare for generation of null complementing extensions */ /*
Prepare for generation of null complementing extensions.
For all inner tables of the outer join operation for which
regular matches have been just found the field 'first_unmatched'
is set to point the the first inner table. After all null
complement rows are generated for this outer join this field
is set back to NULL.
*/
for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++) for (tab= join_tab->first_inner; tab <= join_tab->last_inner; tab++)
tab->first_unmatched= join_tab->first_inner; tab->first_unmatched= join_tab->first_inner;
} }
...@@ -2221,7 +2286,10 @@ enum_nested_loop_state JOIN_CACHE::join_matching_records(bool skip_last) ...@@ -2221,7 +2286,10 @@ enum_nested_loop_state JOIN_CACHE::join_matching_records(bool skip_last)
int error; int error;
enum_nested_loop_state rc= NESTED_LOOP_OK; enum_nested_loop_state rc= NESTED_LOOP_OK;
join_tab->table->null_row= 0; join_tab->table->null_row= 0;
bool check_only_first_match= join_tab->check_only_first_match(); bool check_only_first_match=
join_tab->check_only_first_match() &&
(!join_tab->first_inner || // semi-join case
join_tab->first_inner == join_tab->first_unmatched); // outer join case
bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join(); bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join();
DBUG_ENTER("JOIN_CACHE::join_matching_records"); DBUG_ENTER("JOIN_CACHE::join_matching_records");
......
...@@ -207,6 +207,8 @@ class JOIN_CACHE :public Sql_alloc ...@@ -207,6 +207,8 @@ class JOIN_CACHE :public Sql_alloc
/* /*
This flag indicates that records written into the join buffer contain This flag indicates that records written into the join buffer contain
a match flag field. The flag must be set by the init method. a match flag field. The flag must be set by the init method.
Currently any implementation of the virtial init method calls
the function JOIN_CACHE::calc_record_fields() to set this flag.
*/ */
bool with_match_flag; bool with_match_flag;
/* /*
...@@ -646,6 +648,13 @@ class JOIN_CACHE :public Sql_alloc ...@@ -646,6 +648,13 @@ class JOIN_CACHE :public Sql_alloc
/* Shall return the value of the match flag for the positioned record */ /* Shall return the value of the match flag for the positioned record */
virtual enum Match_flag get_match_flag_by_pos(uchar *rec_ptr); virtual enum Match_flag get_match_flag_by_pos(uchar *rec_ptr);
/*
Shall return the value of the match flag for the positioned record
from the join buffer attached to the specified table
*/
virtual enum Match_flag
get_match_flag_by_pos_from_join_buffer(uchar *rec_ptr, JOIN_TAB *tab);
/* Shall return the position of the current record */ /* Shall return the position of the current record */
virtual uchar *get_curr_rec() { return curr_rec_pos; } virtual uchar *get_curr_rec() { return curr_rec_pos; }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment