From 11aad5cb1880b3fcfcd468bc5fb41b27484882f6 Mon Sep 17 00:00:00 2001
From: Sergey Petrunya <psergey@askmonty.org>
Date: Fri, 5 Aug 2011 22:01:49 +0400
Subject: [PATCH] Backport of: revno: 2876.47.174 revision-id:
 jorgen.loland@oracle.com-20110519120355-qn7eprkad9jqwu5j parent:
 mayank.prasad@oracle.com-20110518143645-bdxv4udzrmqsjmhq committer: Jorgen
 Loland <jorgen.loland@oracle.com> branch nick: mysql-trunk-11765831
 timestamp: Thu 2011-05-19 14:03:55 +0200 message:   BUG#11765831: 'RANGE
 ACCESS' MAY INCORRECTLY FILTER                         AWAY QUALIFYING ROWS

  The problem was that the ranges created when OR'ing two
  conditions could be incorrect. Without the bugfix,
  "I <> 6 OR (I <> 8 AND J = 5)" would create these ranges:

  "NULL < I < 6",
  "6 <= I <= 6 AND 5 <= J <= 5",
  "6 < I < 8",
  "8 <= I <= 8 AND 5 <= J <= 5",
  "8 < I"

  While the correct ranges is
  "NULL < I < 6",
  "6 <= I <= 6 AND 5 <= J <= 5",
  "6 < I"

  The problem occurs when key_or() ORs
  (1) "NULL < I < 6, 6 <= I <= 6 AND 5 <= J <= 5, 6 < I" with
  (2) "8 < I AND 5 <= J <= 5"

  The reason for the bug is that in key_or(), SEL_ARG *tmp is
  used to point to the range in (1) above that is merged with
  (2) while key1 points to the root of the red-black tree of
  (1). When merging (1) and (2), tmp refers to the "6 < I"
  part whereas the root is the "6 <= ... AND 5 <= J <= 5" part.

  key_or() decides that the tmp range needs to be split into
  "6 < I < 8, 8 <= I <= 8, 8 < I", in which next_key_part of the
  second range should be that of tmp. However, next_key_part is
  set to key1->next_key_part ("5 <= J <= 5") instead of
  tmp->next_key_part (empty). Fixing this gives the correct but
  not optimal ranges:
  "NULL < I < 6",
  "6 <= I <= 6 AND 5 <= J <= 5",
  "6 < I < 8",
  "8 <= I <= 8",
  "8 < I"

  A second problem can be seen above: key_or() may create
  adjacent ranges that could be replaced with a single range.
  Fixes for this is also included in the patch so that the range
  above becomes correct AND optimal:
  "NULL < I < 6",
  "6 <= I <= 6 AND 5 <= J <= 5",
  "6 < I"

  Merging adjacent ranges like this gives a slightly lower cost
  estimate for the range access.
---
 mysql-test/r/group_min_max.result             |  6 +-
 mysql-test/r/range.result                     | 48 +++++++++++++++-
 mysql-test/r/range_mrr_icp.result             | 48 +++++++++++++++-
 .../r/range_vs_index_merge_innodb.result      |  4 +-
 mysql-test/t/range.test                       | 48 +++++++++++++++-
 sql/opt_range.cc                              | 57 ++++++++++++++++++-
 6 files changed, 200 insertions(+), 11 deletions(-)

diff --git a/mysql-test/r/group_min_max.result b/mysql-test/r/group_min_max.result
index 7cd0011427e..6850d7c1993 100644
--- a/mysql-test/r/group_min_max.result
+++ b/mysql-test/r/group_min_max.result
@@ -876,10 +876,10 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	range	NULL	idx_t1_1	163	NULL	17	Using where; Using index for group-by
 explain select a1,a2,b,       max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	idx_t1_1	163	NULL	17	Using where; Using index for group-by
+1	SIMPLE	t1	range	NULL	idx_t1_1	147	NULL	17	Using where; Using index for group-by
 explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	NULL	idx_t1_1	163	NULL	17	Using where; Using index for group-by
+1	SIMPLE	t1	range	NULL	idx_t1_1	147	NULL	17	Using where; Using index for group-by
 explain select a1,a2,b,min(c),max(c) from t1 where (c > 'b111') and (c <= 'g112') group by a1,a2,b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	range	NULL	idx_t1_1	163	NULL	17	Using where; Using index for group-by
@@ -924,7 +924,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t2	range	NULL	idx_t2_1	163	NULL	#	Using where; Using index for group-by
 explain select a1,a2,b,       max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	range	NULL	idx_t2_1	163	NULL	#	Using where; Using index for group-by
+1	SIMPLE	t2	range	NULL	idx_t2_1	146	NULL	#	Using where; Using index for group-by
 explain select a1,a2,b,min(c),max(c) from t2 where (c > 'b1') or (c <= 'g1') group by a1,a2,b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t2	range	NULL	idx_t2_1	163	NULL	#	Using where; Using index for group-by
diff --git a/mysql-test/r/range.result b/mysql-test/r/range.result
index b66e0fc3fcd..de93b0eb8c7 100644
--- a/mysql-test/r/range.result
+++ b/mysql-test/r/range.result
@@ -1,4 +1,4 @@
-drop table if exists t1, t2, t3;
+drop table if exists t1, t2, t3, t10, t100;
 CREATE TABLE t1 (
 event_date date DEFAULT '0000-00-00' NOT NULL,
 type int(11) DEFAULT '0' NOT NULL,
@@ -1763,3 +1763,49 @@ select  min(f1)  from t1 where f1 >= '2006-05-25 07:00:20' and f1 between '2003-
 min(f1)
 NULL
 drop table t1;
+#
+# BUG#11765831: 'RANGE ACCESS' MAY INCORRECTLY FILTER 
+#               AWAY QUALIFYING ROWS
+#
+CREATE TABLE t10(
+K INT NOT NULL AUTO_INCREMENT,
+I INT, J INT,
+PRIMARY KEY(K),
+KEY(I,J)
+);
+INSERT INTO t10(I,J) VALUES (6,1),(6,2),(6,3),(6,4),(6,5),
+(6,6),(6,7),(6,8),(6,9),(6,0);
+CREATE TABLE t100 LIKE t10;
+INSERT INTO t100(I,J) SELECT X.I, X.K+(10*Y.K) FROM t10 AS X,t10 AS Y;
+INSERT INTO t100(I,J) VALUES(8,26);
+
+EXPLAIN SELECT * FROM t100 WHERE I <> 6 OR (I <> 8 AND J = 5);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t100	range	I	I	10	NULL	4	Using where
+
+SELECT * FROM t100 WHERE I <> 6 OR (I <> 8 AND J = 5);
+K	I	J
+101	8	26
+DROP TABLE t10,t100;
+#
+# lp:817363: Wrong result with sort_union and multipart key in maria-5.3
+# 
+CREATE TABLE t1 (a int NOT NULL , b int, c int, d varchar(32), KEY (d,b), PRIMARY KEY (a)) ;
+INSERT INTO t1 VALUES (7,7,NULL,'e'),(8,1,0,'p'),(9,7,1,'s'),(10,1,1,'j'),(12,2,0,'c'),(13,0,0,'a'),(14,1,1,'q');
+SELECT c FROM t1                  WHERE d='q' OR d>='q' OR a > 97 OR (d IN ('j','s','i') AND b = 102);
+c
+1
+1
+SELECT c FROM t1 ignore index (d) WHERE d='q' OR d>='q' OR a > 97 OR (d IN ('j','s','i') AND b = 102);
+c
+1
+1
+SELECT * FROM t1 ignore index(d) WHERE d = 'q' OR d >= 'q' OR (d IN ( 'j' , 's' , 'i' ) AND ( b = 102 ));
+a	b	c	d
+9	7	1	s
+14	1	1	q
+SELECT * FROM t1 force index(d)  WHERE d = 'q' OR d >= 'q' OR (d IN ( 'j' , 's' , 'i' ) AND ( b = 102 ));
+a	b	c	d
+14	1	1	q
+9	7	1	s
+DROP TABLE t1;
diff --git a/mysql-test/r/range_mrr_icp.result b/mysql-test/r/range_mrr_icp.result
index f98b91027c9..fdd8c6ca7ba 100644
--- a/mysql-test/r/range_mrr_icp.result
+++ b/mysql-test/r/range_mrr_icp.result
@@ -1,6 +1,6 @@
 set @mrr_icp_extra_tmp=@@optimizer_switch;
 set optimizer_switch='mrr=on,mrr_sort_keys=on,index_condition_pushdown=on';
-drop table if exists t1, t2, t3;
+drop table if exists t1, t2, t3, t10, t100;
 CREATE TABLE t1 (
 event_date date DEFAULT '0000-00-00' NOT NULL,
 type int(11) DEFAULT '0' NOT NULL,
@@ -1765,4 +1765,50 @@ select  min(f1)  from t1 where f1 >= '2006-05-25 07:00:20' and f1 between '2003-
 min(f1)
 NULL
 drop table t1;
+#
+# BUG#11765831: 'RANGE ACCESS' MAY INCORRECTLY FILTER 
+#               AWAY QUALIFYING ROWS
+#
+CREATE TABLE t10(
+K INT NOT NULL AUTO_INCREMENT,
+I INT, J INT,
+PRIMARY KEY(K),
+KEY(I,J)
+);
+INSERT INTO t10(I,J) VALUES (6,1),(6,2),(6,3),(6,4),(6,5),
+(6,6),(6,7),(6,8),(6,9),(6,0);
+CREATE TABLE t100 LIKE t10;
+INSERT INTO t100(I,J) SELECT X.I, X.K+(10*Y.K) FROM t10 AS X,t10 AS Y;
+INSERT INTO t100(I,J) VALUES(8,26);
+
+EXPLAIN SELECT * FROM t100 WHERE I <> 6 OR (I <> 8 AND J = 5);
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t100	range	I	I	10	NULL	4	Using index condition; Rowid-ordered scan
+
+SELECT * FROM t100 WHERE I <> 6 OR (I <> 8 AND J = 5);
+K	I	J
+101	8	26
+DROP TABLE t10,t100;
+#
+# lp:817363: Wrong result with sort_union and multipart key in maria-5.3
+# 
+CREATE TABLE t1 (a int NOT NULL , b int, c int, d varchar(32), KEY (d,b), PRIMARY KEY (a)) ;
+INSERT INTO t1 VALUES (7,7,NULL,'e'),(8,1,0,'p'),(9,7,1,'s'),(10,1,1,'j'),(12,2,0,'c'),(13,0,0,'a'),(14,1,1,'q');
+SELECT c FROM t1                  WHERE d='q' OR d>='q' OR a > 97 OR (d IN ('j','s','i') AND b = 102);
+c
+1
+1
+SELECT c FROM t1 ignore index (d) WHERE d='q' OR d>='q' OR a > 97 OR (d IN ('j','s','i') AND b = 102);
+c
+1
+1
+SELECT * FROM t1 ignore index(d) WHERE d = 'q' OR d >= 'q' OR (d IN ( 'j' , 's' , 'i' ) AND ( b = 102 ));
+a	b	c	d
+9	7	1	s
+14	1	1	q
+SELECT * FROM t1 force index(d)  WHERE d = 'q' OR d >= 'q' OR (d IN ( 'j' , 's' , 'i' ) AND ( b = 102 ));
+a	b	c	d
+9	7	1	s
+14	1	1	q
+DROP TABLE t1;
 set optimizer_switch=@mrr_icp_extra_tmp;
diff --git a/mysql-test/r/range_vs_index_merge_innodb.result b/mysql-test/r/range_vs_index_merge_innodb.result
index 805b8e055ff..54151a7c2db 100644
--- a/mysql-test/r/range_vs_index_merge_innodb.result
+++ b/mysql-test/r/range_vs_index_merge_innodb.result
@@ -332,7 +332,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 EXPLAIN
 SELECT * FROM City WHERE (ID < 200) OR (ID BETWEEN 100 AND 200);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	PRIMARY	PRIMARY	4	NULL	199	Using where
+1	SIMPLE	City	range	PRIMARY	PRIMARY	4	NULL	200	Using where
 EXPLAIN
 SELECT * FROM City WHERE (ID < 600) OR (ID BETWEEN 900 AND 1500);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
@@ -369,7 +369,7 @@ WHERE ((ID < 200) AND (Name LIKE 'Ha%' OR (Country > 'A' AND Country < 'ARG')))
 OR ((ID BETWEEN 100 AND 200) AND 
 (Name LIKE 'Pa%' OR (Population > 103000 AND Population < 104000)));
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	City	range	PRIMARY,Population,Country,Name	PRIMARY	4	NULL	199	Using where
+1	SIMPLE	City	range	PRIMARY,Population,Country,Name	PRIMARY	4	NULL	200	Using where
 SELECT * FROM City USE INDEX ()
 WHERE ((ID < 10) AND (Name LIKE 'H%' OR (Country > 'A' AND Country < 'ARG')))
 OR ((ID BETWEEN 100 AND 110) AND 
diff --git a/mysql-test/t/range.test b/mysql-test/t/range.test
index 7206235e3e2..0a34bac32ba 100644
--- a/mysql-test/t/range.test
+++ b/mysql-test/t/range.test
@@ -3,7 +3,7 @@
 #
 
 --disable_warnings
-drop table if exists t1, t2, t3;
+drop table if exists t1, t2, t3, t10, t100;
 --enable_warnings
 
 CREATE TABLE t1 (
@@ -1402,3 +1402,49 @@ insert into t1 values ('2000-03-09 15:56:59'),('2000-05-05 23:24:28'),('2000-06-
 select  min(f1)  from t1 where f1 >= '2006-05-25 07:00:20' and f1 between '2003-11-23 10:00:09' and '2010-01-01 01:01:01' and f1 > '2001-01-01 01:01:01';
 drop table t1;
 
+--echo #
+--echo # BUG#11765831: 'RANGE ACCESS' MAY INCORRECTLY FILTER 
+--echo #               AWAY QUALIFYING ROWS
+--echo #
+
+CREATE TABLE t10(
+  K INT NOT NULL AUTO_INCREMENT,
+  I INT, J INT,
+  PRIMARY KEY(K),
+  KEY(I,J)
+);
+INSERT INTO t10(I,J) VALUES (6,1),(6,2),(6,3),(6,4),(6,5),
+                            (6,6),(6,7),(6,8),(6,9),(6,0);
+
+CREATE TABLE t100 LIKE t10;
+INSERT INTO t100(I,J) SELECT X.I, X.K+(10*Y.K) FROM t10 AS X,t10 AS Y;
+
+# Insert offending value:
+INSERT INTO t100(I,J) VALUES(8,26);
+
+let $query= SELECT * FROM t100 WHERE I <> 6 OR (I <> 8 AND J = 5);
+
+#Verify that 'range' access will be used
+--echo
+--eval EXPLAIN $query
+
+# Only row 101,8,26 should be returned
+--echo
+--eval $query
+
+DROP TABLE t10,t100;
+
+--echo #
+--echo # lp:817363: Wrong result with sort_union and multipart key in maria-5.3
+--echo # 
+CREATE TABLE t1 (a int NOT NULL , b int, c int, d varchar(32), KEY (d,b), PRIMARY KEY (a)) ;
+INSERT INTO t1 VALUES (7,7,NULL,'e'),(8,1,0,'p'),(9,7,1,'s'),(10,1,1,'j'),(12,2,0,'c'),(13,0,0,'a'),(14,1,1,'q');
+
+SELECT c FROM t1                  WHERE d='q' OR d>='q' OR a > 97 OR (d IN ('j','s','i') AND b = 102);
+SELECT c FROM t1 ignore index (d) WHERE d='q' OR d>='q' OR a > 97 OR (d IN ('j','s','i') AND b = 102);
+
+SELECT * FROM t1 ignore index(d) WHERE d = 'q' OR d >= 'q' OR (d IN ( 'j' , 's' , 'i' ) AND ( b = 102 ));
+SELECT * FROM t1 force index(d)  WHERE d = 'q' OR d >= 'q' OR (d IN ( 'j' , 's' , 'i' ) AND ( b = 102 ));
+
+DROP TABLE t1;
+
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 390d18ebdb8..3ed975a59bb 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -9044,11 +9044,53 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
         This is the case ("cmp>=0" means that tmp.max >= key2.min):
         key2:              [----]
         tmp:     [------------*****]
+      */
+
+      if (!tmp->next_key_part)
+      {
+        /*
+          tmp->next_key_part is empty: cut the range that is covered
+          by tmp from key2. 
+          Reason: (key2->next_key_part OR tmp->next_key_part) will be
+          empty and therefore equal to tmp->next_key_part. Thus, this
+          part of the key2 range is completely covered by tmp.
+        */
+        if (tmp->cmp_max_to_max(key2) >= 0)
+        {
+          /*
+            tmp covers the entire range in key2. 
+            key2:              [----]
+            tmp:     [-----------------]
+
+            Move on to next range in key2
+          */
+          key2->increment_use_count(-1); // Free not used tree
+          key2=key2->next;
+          continue;
+        }
+        else
+        {
+          /*
+            This is the case:
+            key2:           [-------]
+            tmp:     [---------]
+
+            Result:
+            key2:               [---]
+            tmp:     [---------]
+          */
+          key2->copy_max_to_min(tmp);
+          continue;
+        }
+      }
 
+      /*
         The ranges are overlapping but have not been merged because
-        next_key_part of tmp and key2 are different
+        next_key_part of tmp and key2 differ. 
+        key2:              [----]
+        tmp:     [------------*****]
 
-        Result:
+        Split tmp in two where key2 starts:
         key2:              [----]
         key1:    [--------][--*****]
                  ^         ^
@@ -9057,7 +9099,7 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
       SEL_ARG *new_arg=tmp->clone_first(key2);
       if (!new_arg)
         return 0;                               // OOM
-      if ((new_arg->next_key_part= key1->next_key_part))
+      if ((new_arg->next_key_part= tmp->next_key_part))
         new_arg->increment_use_count(key1->use_count+1);
       tmp->copy_min_to_min(key2);
       key1=key1->insert(new_arg);
@@ -9166,12 +9208,21 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
                       ^        ^
                       new_arg  tmp
           Steps:
+           0) If tmp->next_key_part is empty: do nothing. Reason:
+              (key2_cpy->next_key_part OR tmp->next_key_part) will be
+              empty and therefore equal to tmp->next_key_part. Thus,
+              the range in key2_cpy is completely covered by tmp
            1) Make new_arg with range [tmp.min, key2_cpy.max].
               new_arg->next_key_part is OR between next_key_part
               of tmp and key2_cpy
            2) Make tmp the range [key2.max, tmp.max]
            3) Insert new_arg into key1
         */
+        if (!tmp->next_key_part) // Step 0
+        {
+          key2_cpy.increment_use_count(-1);     // Free not used tree
+          break;
+        }
         SEL_ARG *new_arg=tmp->clone_last(&key2_cpy);
         if (!new_arg)
           return 0; // OOM
-- 
2.30.9