Fix dense_rank returning minimum rank of 2 when using null columns.

The bug was caused by a weird behaviour in test_if_group_changed, not returning true when testing for the first time after initializing the Cached_item list.

Fix dense_rank returning minimum rank of 2 when using null columns.
The bug was caused by a weird behaviour in test_if_group_changed, not returning true when testing for the first time after initializing the Cached_item list.
419c9250 · Vicențiu Ciorbaru · c61bb139 · 419c9250 · 419c9250 · 419c9250
Commit 419c9250 authored Apr 10, 2016 by Vicențiu Ciorbaru
5 changed files
--- a/mysql-test/r/win.result
+++ b/mysql-test/r/win.result
@@ -105,50 +105,6 @@ pk	a	rank() over (order by a desc)
 10	4	1
 drop table t2;
 #
-# Try DENSE_RANK() function
-#
-create table t3 (
-pk int primary key,
-a int,
-b int
-);
-insert into t3 values
-( 1 , 0, 10),
-( 2 , 0, 10),
-( 3 , 1, 10),
-( 4 , 1, 10),
-( 8 , 2, 10),
-( 5 , 2, 20),
-( 6 , 2, 20),
-( 7 , 2, 20),
-( 9 , 4, 20),
-(10 , 4, 20);
-select pk, a, b, rank() over (order by a), dense_rank() over (order by a) from t3;
-pk	a	b	rank() over (order by a)	dense_rank() over (order by a)
-1	0	10	1	1
-2	0	10	1	1
-3	1	10	3	2
-4	1	10	3	2
-8	2	10	5	3
-5	2	20	5	3
-6	2	20	5	3
-7	2	20	5	3
-9	4	20	9	4
-10	4	20	9	4
-select pk, a, b, rank() over (partition by b order by a), dense_rank() over (partition by b order by a) from t3;
-pk	a	b	rank() over (partition by b order by a)	dense_rank() over (partition by b order by a)
-1	0	10	1	1
-2	0	10	1	1
-3	1	10	3	2
-4	1	10	3	2
-8	2	10	5	3
-5	2	20	1	1
-6	2	20	1	1
-7	2	20	1	1
-9	4	20	4	2
-10	4	20	4	2
-drop table t3;
-#
 # Try Aggregates as window functions. With frames.
 #
 create table t0 (a int);

--- a/mysql-test/r/win_rank.result
+++ b/mysql-test/r/win_rank.result
+#
+# Try DENSE_RANK() function
+#
+create table t1 (
+pk int primary key,
+a int,
+b int
+);
+insert into t1 values
+( 1 , 0, 10),
+( 2 , 0, 10),
+( 3 , 1, 10),
+( 4 , 1, 10),
+( 8 , 2, 10),
+( 5 , 2, 20),
+( 6 , 2, 20),
+( 7 , 2, 20),
+( 9 , 4, 20),
+(10 , 4, 20);
+select pk, a, b, rank() over (order by a) as rank,
+dense_rank() over (order by a) as dense_rank
+from t1;
+pk	a	b	rank	dense_rank
+1	0	10	1	1
+2	0	10	1	1
+3	1	10	3	2
+4	1	10	3	2
+8	2	10	5	3
+5	2	20	5	3
+6	2	20	5	3
+7	2	20	5	3
+9	4	20	9	4
+10	4	20	9	4
+select pk, a, b, rank() over (partition by b order by a) as rank,
+dense_rank() over (partition by b order by a) as dense_rank
+from t1;
+pk	a	b	rank	dense_rank
+1	0	10	1	1
+2	0	10	1	1
+3	1	10	3	2
+4	1	10	3	2
+8	2	10	5	3
+5	2	20	1	0
+6	2	20	1	0
+7	2	20	1	0
+9	4	20	4	1
+10	4	20	4	1
+drop table t1;
+#
+# Test with null values in the table.
+#
+create table t2 (s1 int, s2 char(5));
+insert into t2 values (1,'a');
+insert into t2 values (null,null);
+insert into t2 values (1,null);
+insert into t2 values (null,'a');
+insert into t2 values (null,'c');
+insert into t2 values (2,'b');
+insert into t2 values (-1,'');
+select *, rank() over (order by s1) as rank,
+dense_rank() over (order by s1) as dense_rank
+from t2;
+s1	s2	rank	dense_rank
+1	a	5	3
+NULL	NULL	1	1
+1	NULL	5	3
+NULL	a	1	1
+NULL	c	1	1
+2	b	7	4
+-1		4	2
+select *, rank() over (partition by s2 order by s1) as rank,
+dense_rank() over (partition by s2 order by s1) as dense_rank
+from t2;
+s1	s2	rank	dense_rank
+1	a	2	2
+NULL	NULL	1	1
+1	NULL	2	2
+NULL	a	1	1
+NULL	c	1	1
+2	b	1	1
+-1		1	1
+select *, rank() over (order by s2) as rank,
+dense_rank() over (order by s2) as dense_rank
+from t2;
+s1	s2	rank	dense_rank
+1	a	4	3
+NULL	NULL	1	1
+1	NULL	1	1
+NULL	a	4	3
+NULL	c	7	5
+2	b	6	4
+-1		3	2
+select *, rank() over (partition by s1 order by s2) as rank,
+dense_rank() over (partition by s1 order by s2) as dense_rank
+from t2;
+s1	s2	rank	dense_rank
+1	a	2	2
+NULL	NULL	1	1
+1	NULL	1	1
+NULL	a	2	2
+NULL	c	3	3
+2	b	1	1
+-1		1	1
+drop table t2;
--- a/mysql-test/t/win.test
+++ b/mysql-test/t/win.test
@@ -86,32 +86,6 @@ select pk, a, rank() over (order by a desc) from t2;

 drop table t2;

--echo #
--echo # Try DENSE_RANK() function
--echo #
-create table t3 (
-  pk int primary key,
-  a int,
-  b int
-);
-
-insert into t3 values
-( 1 , 0, 10),
-( 2 , 0, 10),
-( 3 , 1, 10),
-( 4 , 1, 10),
-( 8 , 2, 10),
-( 5 , 2, 20),
-( 6 , 2, 20),
-( 7 , 2, 20),
-( 9 , 4, 20),
-(10 , 4, 20);
-
-select pk, a, b, rank() over (order by a), dense_rank() over (order by a) from t3;
-select pk, a, b, rank() over (partition by b order by a), dense_rank() over (partition by b order by a) from t3;
-
-drop table t3;
-
 --echo #
 --echo # Try Aggregates as window functions. With frames.
 --echo #

--- a/mysql-test/t/win_rank.test
+++ b/mysql-test/t/win_rank.test
+--echo #
+--echo # Try DENSE_RANK() function
+--echo #
+
+create table t1 (
+  pk int primary key,
+  a int,
+  b int
+);
+
+insert into t1 values
+( 1 , 0, 10),
+( 2 , 0, 10),
+( 3 , 1, 10),
+( 4 , 1, 10),
+( 8 , 2, 10),
+( 5 , 2, 20),
+( 6 , 2, 20),
+( 7 , 2, 20),
+( 9 , 4, 20),
+(10 , 4, 20);
+
+select pk, a, b, rank() over (order by a) as rank,
+                 dense_rank() over (order by a) as dense_rank
+from t1;
+select pk, a, b, rank() over (partition by b order by a) as rank,
+                 dense_rank() over (partition by b order by a) as dense_rank
+from t1;
+
+drop table t1;
+
+--echo #
+--echo # Test with null values in the table.
+--echo #
+
+create table t2 (s1 int, s2 char(5));
+insert into t2 values (1,'a');
+insert into t2 values (null,null);
+insert into t2 values (1,null);
+insert into t2 values (null,'a');
+insert into t2 values (null,'c');
+insert into t2 values (2,'b');
+insert into t2 values (-1,'');
+
+select *, rank() over (order by s1) as rank,
+          dense_rank() over (order by s1) as dense_rank
+from t2;
+select *, rank() over (partition by s2 order by s1) as rank,
+          dense_rank() over (partition by s2 order by s1) as dense_rank
+from t2;
+select *, rank() over (order by s2) as rank,
+          dense_rank() over (order by s2) as dense_rank
+from t2;
+select *, rank() over (partition by s1 order by s2) as rank,
+          dense_rank() over (partition by s1 order by s2) as dense_rank
+from t2;
+
+drop table t2;
--- a/sql/item_windowfunc.h
+++ b/sql/item_windowfunc.h
@@ -13,6 +13,16 @@ int test_if_group_changed(List<Cached_item> &list);
 class Group_bound_tracker
 {
  List<Cached_item> group_fields;
+  /*
+    During the first check_if_next_group, the list of cached_items is not
+    initialized. The compare function will return that the items match if
+    the field's value is the same as the Cached_item's default value (0).
+    This flag makes sure that we always return true during the first check.
+
+    XXX This is better to be implemented within test_if_group_changed, but
+    since it is used in other parts of the codebase, we keep it here for now.
+  */
+   bool first_check;
 public:
  void init(THD *thd, SQL_I_List<ORDER> *list)
  {
@@ -21,6 +31,7 @@ class Group_bound_tracker
      Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE);
      group_fields.push_back(tmp);
    }
+    first_check= true;
  }

  void cleanup()
@@ -31,15 +42,28 @@ class Group_bound_tracker
  /*
    Check if the current row is in a different group than the previous row
    this function was called for.
-    The new row's group becomes the current row's group.
+    XXX: Side-effect: The new row's group becomes the current row's group.
+
+    Returns true if there is a change between the current_group and the cached
+    value, or if it is the first check after a call to init.
  */
  bool check_if_next_group()
  {
-    if (test_if_group_changed(group_fields) > -1)
+    if (test_if_group_changed(group_fields) > -1 || first_check)
+    {
+      first_check= false;
      return true;
+    }
    return false;
  }

+  /*
+    Check if the current row is in a different group than the previous row
+    check_if_next_group was called for.
+
+    Compares the groups without the additional side effect of updating the
+    current cached values.
+  */
  int compare_with_cache()
  {
    List_iterator<Cached_item> li(group_fields);
@@ -196,7 +220,7 @@ class Item_sum_dense_rank: public Item_sum_int
  */
  void clear()
  {
-    dense_rank= 1;
+    dense_rank= 0;
  }
  bool add();
  void update_field() {}