Commit c4f5326b authored by Michael Widenius's avatar Michael Widenius

MDEV-6255 DUPLICATE KEY Errors on SELECT .. GROUP BY that uses temporary and filesort.

The problem was that my_hash_sort didn't properly delete end-space characters properly, so strings that should compare
identically was seen as different strings.  (Space was handled correctly, but not NBSP)
This caused duplicate key errors when a heap table was converted to Aria as part of overflow in group by.

Fixed by removing all characters that compares as end space when creating a hash.

Other things:
- Fixed that --sorted_results also works for errors in mysqltest.
- Speed up hash by not comparing strings that has different hash.
- Speed up many my_hash_sort functions by using registers to calculate hash instead of pointers.
  This was previously done for some functions, but not for all.
- Made a macro of the hash function, to simplify code and to be able to experiment with new hash functions.







client/mysqltest.cc:
  Fixed that --sorted_results also works for error messages.
mysql-test/r/ctype_partitions.result:
  New test to ensure that partitions on hash works
mysql-test/suite/multi_source/gtid.result:
  Updated result
mysql-test/suite/multi_source/gtid.test:
  Test that --sorted_result works for error messages
mysql-test/suite/multi_source/gtid_ignore_duplicates.result:
  Updated result
mysql-test/suite/multi_source/gtid_ignore_duplicates.test:
  Updated result
mysql-test/suite/multi_source/load_data.result:
  Updated result
mysql-test/suite/multi_source/load_data.test:
  Updated result
mysql-test/t/ctype_partitions.test:
  New test to ensure that partitions on hash works
storage/heap/hp_write.c:
  Speed up hash by not comparing strings that has different hash.
storage/maria/ma_check.c:
  Extra debug
strings/ctype-bin.c:
  Use macro for hash function
strings/ctype-latin1.c:
  Use macro for hash function
  Use registers to calculate hash (speedup)
strings/ctype-mb.c:
  Use macro for hash function
  Use registers to calculate hash (speedup)
strings/ctype-simple.c:
  Use macro for hash function
  Use same variable names as in other my_hash_sort functions.
  Update my_hash_sort_simple() to properly remove end space (patch by Bar)
strings/ctype-uca.c:
  Ignore duplicated space inside strings and end space in my_hash_sort_uca(). This fixed MDEV-6255
  Use macro for hash function
  Use registers to calculate hash (speedup)
strings/ctype-ucs2.c:
  Use macro for hash function
  Use registers to calculate hash (speedup)
strings/ctype-utf8.c:
  Use macro for hash function
  Use registers to calculate hash (speedup)
strings/strings_def.h:
  Made a macro of the hash function, to simplify code and to be able to experiment with new hash functions.
parent 2362d984
...@@ -7719,6 +7719,7 @@ int append_warnings(DYNAMIC_STRING *ds, MYSQL* mysql) ...@@ -7719,6 +7719,7 @@ int append_warnings(DYNAMIC_STRING *ds, MYSQL* mysql)
{ {
uint count; uint count;
MYSQL_RES *warn_res; MYSQL_RES *warn_res;
DYNAMIC_STRING res;
DBUG_ENTER("append_warnings"); DBUG_ENTER("append_warnings");
if (!(count= mysql_warning_count(mysql))) if (!(count= mysql_warning_count(mysql)))
...@@ -7738,11 +7739,18 @@ int append_warnings(DYNAMIC_STRING *ds, MYSQL* mysql) ...@@ -7738,11 +7739,18 @@ int append_warnings(DYNAMIC_STRING *ds, MYSQL* mysql)
die("Warning count is %u but didn't get any warnings", die("Warning count is %u but didn't get any warnings",
count); count);
append_result(ds, warn_res); init_dynamic_string(&res, "", 1024, 1024);
append_result(&res, warn_res);
mysql_free_result(warn_res); mysql_free_result(warn_res);
DBUG_PRINT("warnings", ("%s", ds->str)); DBUG_PRINT("warnings", ("%s", res.str));
if (display_result_sorted)
dynstr_append_sorted(ds, &res, 0);
else
dynstr_append_mem(ds, res.str, res.length);
dynstr_free(&res);
DBUG_RETURN(count); DBUG_RETURN(count);
} }
......
#
# MDEV-6255 DUPLICATE KEY Errors on SELECT .. GROUP BY that uses temporary and filesort
#
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET cp1251 COLLATE cp1251_ukrainian_ci);
INSERT INTO t1 VALUES (0x20),(0x60),(0x6060),(0x606060);
SELECT HEX(a) FROM t1 WHERE a=0x60;
HEX(a)
20
60
6060
606060
ALTER TABLE t1 PARTITION BY KEY(a) PARTITIONS 3;
SELECT HEX(a) FROM t1 WHERE a=0x60;
HEX(a)
20
60
6060
606060
DROP TABLE t1;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET koi8u COLLATE koi8u_general_ci);
INSERT INTO t1 VALUES (0x20),(0x60),(0x6060),(0x606060);
SELECT HEX(a) FROM t1 WHERE a=0x60;
HEX(a)
20
60
6060
606060
ALTER TABLE t1 PARTITION BY KEY(a) PARTITIONS 3;
SELECT HEX(a) FROM t1 WHERE a=0x60;
HEX(a)
20
60
6060
606060
DROP TABLE t1;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET cp1250 COLLATE cp1250_general_ci);
INSERT INTO t1 VALUES (0x20),(0xA0),(0xA0A0),(0xA0A0A0);
SELECT HEX(a) FROM t1 WHERE a=0xA0;
HEX(a)
20
A0
A0A0
A0A0A0
ALTER TABLE t1 PARTITION BY KEY(a) PARTITIONS 3;
SELECT HEX(a) FROM t1 WHERE a=0xA0;
HEX(a)
20
A0
A0A0
A0A0A0
DROP TABLE t1;
...@@ -141,8 +141,8 @@ include/reset_master_slave.inc ...@@ -141,8 +141,8 @@ include/reset_master_slave.inc
SET GLOBAL gtid_domain_id=0; SET GLOBAL gtid_domain_id=0;
STOP ALL SLAVES; STOP ALL SLAVES;
Warnings: Warnings:
Note 1938 SLAVE 'slave2' stopped
Note 1938 SLAVE 'slave1' stopped Note 1938 SLAVE 'slave1' stopped
Note 1938 SLAVE 'slave2' stopped
include/reset_master_slave.inc include/reset_master_slave.inc
SET GLOBAL gtid_domain_id=0; SET GLOBAL gtid_domain_id=0;
include/reset_master_slave.inc include/reset_master_slave.inc
......
...@@ -141,12 +141,14 @@ DROP TABLE t3; ...@@ -141,12 +141,14 @@ DROP TABLE t3;
SET GLOBAL gtid_domain_id=0; SET GLOBAL gtid_domain_id=0;
--let $wait_condition= SELECT COUNT(*)=0 FROM information_schema.tables WHERE table_name IN ("t1", "t2", "t3") AND table_schema = "test" --let $wait_condition= SELECT COUNT(*)=0 FROM information_schema.tables WHERE table_name IN ("t1", "t2", "t3") AND table_schema = "test"
--source include/wait_condition.inc --source include/wait_condition.inc
--sorted_result
STOP ALL SLAVES; STOP ALL SLAVES;
--source reset_master_slave.inc --source reset_master_slave.inc
--disconnect slave1 --disconnect slave1
--connection slave2 --connection slave2
SET GLOBAL gtid_domain_id=0; SET GLOBAL gtid_domain_id=0;
--sorted_result
STOP ALL SLAVES; STOP ALL SLAVES;
--source reset_master_slave.inc --source reset_master_slave.inc
--disconnect slave2 --disconnect slave2
......
...@@ -245,8 +245,8 @@ a ...@@ -245,8 +245,8 @@ a
SET GLOBAL gtid_domain_id=0; SET GLOBAL gtid_domain_id=0;
STOP ALL SLAVES; STOP ALL SLAVES;
Warnings: Warnings:
Note 1938 SLAVE 'c2a' stopped
Note 1938 SLAVE 'b2a' stopped Note 1938 SLAVE 'b2a' stopped
Note 1938 SLAVE 'c2a' stopped
SET GLOBAL slave_parallel_threads= @old_parallel; SET GLOBAL slave_parallel_threads= @old_parallel;
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates; SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
SET GLOBAL gtid_domain_id=0; SET GLOBAL gtid_domain_id=0;
......
...@@ -261,24 +261,28 @@ SELECT * FROM t1 WHERE a >= 20 ORDER BY a; ...@@ -261,24 +261,28 @@ SELECT * FROM t1 WHERE a >= 20 ORDER BY a;
# Clean up. # Clean up.
--connection server_1 --connection server_1
SET GLOBAL gtid_domain_id=0; SET GLOBAL gtid_domain_id=0;
--sorted_result
STOP ALL SLAVES; STOP ALL SLAVES;
SET GLOBAL slave_parallel_threads= @old_parallel; SET GLOBAL slave_parallel_threads= @old_parallel;
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates; SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
--connection server_2 --connection server_2
SET GLOBAL gtid_domain_id=0; SET GLOBAL gtid_domain_id=0;
--sorted_result
STOP ALL SLAVES; STOP ALL SLAVES;
SET GLOBAL slave_parallel_threads= @old_parallel; SET GLOBAL slave_parallel_threads= @old_parallel;
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates; SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
--connection server_3 --connection server_3
SET GLOBAL gtid_domain_id=0; SET GLOBAL gtid_domain_id=0;
--sorted_result
STOP ALL SLAVES; STOP ALL SLAVES;
SET GLOBAL slave_parallel_threads= @old_parallel; SET GLOBAL slave_parallel_threads= @old_parallel;
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates; SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
--connection server_4 --connection server_4
SET GLOBAL gtid_domain_id=0; SET GLOBAL gtid_domain_id=0;
--sorted_result
STOP ALL SLAVES; STOP ALL SLAVES;
SET GLOBAL slave_parallel_threads= @old_parallel; SET GLOBAL slave_parallel_threads= @old_parallel;
SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates; SET GLOBAL gtid_ignore_duplicates= @old_ignore_duplicates;
......
...@@ -23,8 +23,8 @@ drop table t1; ...@@ -23,8 +23,8 @@ drop table t1;
drop table t2; drop table t2;
stop all slaves; stop all slaves;
Warnings: Warnings:
Note 1938 SLAVE 'master2' stopped
Note 1938 SLAVE '' stopped Note 1938 SLAVE '' stopped
Note 1938 SLAVE 'master2' stopped
include/reset_master_slave.inc include/reset_master_slave.inc
include/reset_master_slave.inc include/reset_master_slave.inc
include/reset_master_slave.inc include/reset_master_slave.inc
...@@ -58,6 +58,7 @@ drop table t2; ...@@ -58,6 +58,7 @@ drop table t2;
--sync_with_master 0,'master2' --sync_with_master 0,'master2'
--connection slave --connection slave
--sorted_result
stop all slaves; stop all slaves;
--source reset_master_slave.inc --source reset_master_slave.inc
......
--source include/have_partition.inc
--echo #
--echo # MDEV-6255 DUPLICATE KEY Errors on SELECT .. GROUP BY that uses temporary and filesort
--echo #
# cp1251_ukrainian_ci: 0x20 SPACE is equal to 0x60 GRAVE ACCENT
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET cp1251 COLLATE cp1251_ukrainian_ci);
INSERT INTO t1 VALUES (0x20),(0x60),(0x6060),(0x606060);
SELECT HEX(a) FROM t1 WHERE a=0x60;
ALTER TABLE t1 PARTITION BY KEY(a) PARTITIONS 3;
SELECT HEX(a) FROM t1 WHERE a=0x60;
DROP TABLE t1;
# koi8u_general_ci: 0x20 SPACE is equal to 0x60 GRAVE ACCENT
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET koi8u COLLATE koi8u_general_ci);
INSERT INTO t1 VALUES (0x20),(0x60),(0x6060),(0x606060);
SELECT HEX(a) FROM t1 WHERE a=0x60;
ALTER TABLE t1 PARTITION BY KEY(a) PARTITIONS 3;
SELECT HEX(a) FROM t1 WHERE a=0x60;
DROP TABLE t1;
# cp1250_general_ci: 0x20 SPACE is equal to 0xA0 NO-BREAK SPACE
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET cp1250 COLLATE cp1250_general_ci);
INSERT INTO t1 VALUES (0x20),(0xA0),(0xA0A0),(0xA0A0A0);
SELECT HEX(a) FROM t1 WHERE a=0xA0;
ALTER TABLE t1 PARTITION BY KEY(a) PARTITIONS 3;
SELECT HEX(a) FROM t1 WHERE a=0xA0;
DROP TABLE t1;
...@@ -385,7 +385,8 @@ int hp_write_key(HP_INFO *info, HP_KEYDEF *keyinfo, ...@@ -385,7 +385,8 @@ int hp_write_key(HP_INFO *info, HP_KEYDEF *keyinfo,
pos=empty; pos=empty;
do do
{ {
if (! hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec, 1)) if (pos->hash_of_key == hash_of_key &&
! hp_rec_key_cmp(keyinfo, record, pos->ptr_to_rec, 1))
{ {
DBUG_RETURN(my_errno=HA_ERR_FOUND_DUPP_KEY); DBUG_RETURN(my_errno=HA_ERR_FOUND_DUPP_KEY);
} }
......
...@@ -5515,6 +5515,8 @@ static int sort_key_write(MARIA_SORT_PARAM *sort_param, const uchar *a) ...@@ -5515,6 +5515,8 @@ static int sort_key_write(MARIA_SORT_PARAM *sort_param, const uchar *a)
} }
if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0) if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0)
{ {
DBUG_EXECUTE("key", _ma_print_keydata(DBUG_FILE, sort_param->seg, a,
USE_WHOLE_KEY););
sort_info->dupp++; sort_info->dupp++;
sort_info->info->cur_row.lastpos= get_record_for_key(sort_param->keyinfo, sort_info->info->cur_row.lastpos= get_record_for_key(sort_param->keyinfo,
a); a);
......
...@@ -288,9 +288,7 @@ void my_hash_sort_8bit_bin(CHARSET_INFO *cs __attribute__((unused)), ...@@ -288,9 +288,7 @@ void my_hash_sort_8bit_bin(CHARSET_INFO *cs __attribute__((unused)),
for (; key < end ; key++) for (; key < end ; key++)
{ {
tmp1^= (ulong) ((((uint) tmp1 & 63) + tmp2) * MY_HASH_ADD(tmp1, tmp2, (uint) *key);
((uint) *key)) + (tmp1 << 8);
tmp2+= 3;
} }
*nr1= tmp1; *nr1= tmp1;
...@@ -307,9 +305,7 @@ void my_hash_sort_bin(CHARSET_INFO *cs __attribute__((unused)), ...@@ -307,9 +305,7 @@ void my_hash_sort_bin(CHARSET_INFO *cs __attribute__((unused)),
for (; key < end ; key++) for (; key < end ; key++)
{ {
tmp1^= (ulong) ((((uint) tmp1 & 63) + tmp2) * MY_HASH_ADD(tmp1, tmp2, (uint) *key);
((uint) *key)) + (tmp1 << 8);
tmp2+= 3;
} }
*nr1= tmp1; *nr1= tmp1;
......
...@@ -691,6 +691,8 @@ void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)), ...@@ -691,6 +691,8 @@ void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
ulong *nr1, ulong *nr2) ulong *nr1, ulong *nr2)
{ {
const uchar *end; const uchar *end;
register ulong m1= *nr1, m2= *nr2;
/* /*
Remove end space. We have to do this to be able to compare Remove end space. We have to do this to be able to compare
'AE' and '' as identical 'AE' and '' as identical
...@@ -700,14 +702,14 @@ void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)), ...@@ -700,14 +702,14 @@ void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
for (; key < end ; key++) for (; key < end ; key++)
{ {
uint X= (uint) combo1map[(uint) *key]; uint X= (uint) combo1map[(uint) *key];
nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8); MY_HASH_ADD(m1, m2, X);
nr2[0]+=3;
if ((X= combo2map[*key])) if ((X= combo2map[*key]))
{ {
nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8); MY_HASH_ADD(m1, m2, X);
nr2[0]+=3;
} }
} }
*nr1= m1;
*nr2= m2;
} }
......
...@@ -680,6 +680,8 @@ void ...@@ -680,6 +680,8 @@ void
my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *key, size_t len,ulong *nr1, ulong *nr2) const uchar *key, size_t len,ulong *nr1, ulong *nr2)
{ {
register ulong m1= *nr1, m2= *nr2;
/* /*
Remove trailing spaces. We have to do this to be able to compare Remove trailing spaces. We have to do this to be able to compare
'A ' and 'A' as identical 'A ' and 'A' as identical
...@@ -688,10 +690,10 @@ my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), ...@@ -688,10 +690,10 @@ my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
for (; key < end ; key++) for (; key < end ; key++)
{ {
nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * MY_HASH_ADD(m1, m2, (uint)*key);
((uint)*key)) + (nr1[0] << 8);
nr2[0]+=3;
} }
*nr1= m1;
*nr2= m2;
} }
......
...@@ -306,24 +306,48 @@ void my_hash_sort_simple(CHARSET_INFO *cs, ...@@ -306,24 +306,48 @@ void my_hash_sort_simple(CHARSET_INFO *cs,
{ {
register const uchar *sort_order=cs->sort_order; register const uchar *sort_order=cs->sort_order;
const uchar *end; const uchar *end;
ulong n1, n2; register ulong m1= *nr1, m2= *nr2;
uint16 space_weight= sort_order[' '];
/* /*
Remove end space. We have to do this to be able to compare Remove all trailing characters that are equal to space.
'A ' and 'A' as identical We have to do this to be able to compare 'A ' and 'A' as identical.
If the key is long enough, cut the trailing spaces (0x20) using an
optimized function implemented in skip_trailing_spaces().
"len > 16" is just some heuristic here.
Calling skip_triling_space() for short values is not desirable,
because its initialization block may be more expensive than the
performance gained.
*/
end= len > 16 ? skip_trailing_space(key, len) : key + len;
/*
We removed all trailing characters that are binary equal to space 0x20.
Now remove all trailing characters that have weights equal to space.
Some 8bit simple collations may have such characters:
- cp1250_general_ci 0xA0 NO-BREAK SPACE == 0x20 SPACE
- cp1251_ukrainian_ci 0x60 GRAVE ACCENT == 0x20 SPACE
- koi8u_general_ci 0x60 GRAVE ACCENT == 0x20 SPACE
*/ */
end= skip_trailing_space(key, len);
n1= *nr1; for ( ; key < end ; )
n2= *nr2; {
if (sort_order[*--end] != space_weight)
{
end++;
break;
}
}
for (; key < (uchar*) end ; key++) for (; key < (uchar*) end ; key++)
{ {
n1^=(ulong) ((((uint) n1 & 63)+n2) * MY_HASH_ADD(m1, m2, (uint) sort_order[(uint) *key]);
((uint) sort_order[(uint) *key])) + (n1 << 8);
n2+=3;
} }
*nr1= n1; *nr1= m1;
*nr2= n2; *nr2= m2;
} }
......
...@@ -20873,21 +20873,45 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs, ...@@ -20873,21 +20873,45 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs,
static void my_hash_sort_uca(CHARSET_INFO *cs, static void my_hash_sort_uca(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler, my_uca_scanner_handler *scanner_handler,
const uchar *s, size_t slen, const uchar *s, size_t slen,
ulong *n1, ulong *n2) ulong *nr1, ulong *nr2)
{ {
int s_res; int s_res;
my_uca_scanner scanner; my_uca_scanner scanner;
int space_weight= my_space_weight(cs);
slen= cs->cset->lengthsp(cs, (char*) s, slen); register ulong m1= *nr1, m2= *nr2;
scanner_handler->init(&scanner, cs, &cs->uca->level[0], s, slen); scanner_handler->init(&scanner, cs, &cs->uca->level[0], s, slen);
while ((s_res= scanner_handler->next(&scanner)) >0) while ((s_res= scanner_handler->next(&scanner)) >0)
{ {
n1[0]^= (((n1[0] & 63)+n2[0])*(s_res >> 8))+ (n1[0] << 8); if (s_res == space_weight)
n2[0]+=3; {
n1[0]^= (((n1[0] & 63)+n2[0])*(s_res & 0xFF))+ (n1[0] << 8); /* Combine all spaces to be able to skip end spaces */
n2[0]+=3; uint count= 0;
do
{
count++;
if ((s_res= scanner_handler->next(&scanner)) <= 0)
{
/* Skip strings at end of string */
goto end;
}
}
while (s_res == space_weight);
/* Add back that has for the space characters */
do
{
MY_HASH_ADD_16(m1, m2, space_weight);
}
while (--count != 0);
}
MY_HASH_ADD_16(m1, m2, s_res);
} }
end:
*nr1= m1;
*nr2= m2;
} }
...@@ -1222,23 +1222,23 @@ my_caseup_utf16(CHARSET_INFO *cs, char *src, size_t srclen, ...@@ -1222,23 +1222,23 @@ my_caseup_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
static void static void
my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen, my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *n1, ulong *n2) ulong *nr1, ulong *nr2)
{ {
my_wc_t wc; my_wc_t wc;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
int res; int res;
const uchar *e= s + cs->cset->lengthsp(cs, (const char *) s, slen); const uchar *e= s + cs->cset->lengthsp(cs, (const char *) s, slen);
MY_UNICASE_INFO *uni_plane= cs->caseinfo; MY_UNICASE_INFO *uni_plane= cs->caseinfo;
register ulong m1= *nr1, m2= *nr2;
while ((s < e) && (res= mb_wc(cs, &wc, (uchar *) s, (uchar *) e)) > 0) while ((s < e) && (res= mb_wc(cs, &wc, (uchar *) s, (uchar *) e)) > 0)
{ {
my_tosort_utf16(uni_plane, &wc); my_tosort_utf16(uni_plane, &wc);
n1[0]^= (((n1[0] & 63) + n2[0]) * (wc & 0xFF)) + (n1[0] << 8); MY_HASH_ADD_16(m1, m2, wc);
n2[0]+= 3;
n1[0]^= (((n1[0] & 63) + n2[0]) * (wc >> 8)) + (n1[0] << 8);
n2[0]+= 3;
s+= res; s+= res;
} }
*nr1= m1;
*nr2= m2;
} }
...@@ -1611,12 +1611,14 @@ my_hash_sort_utf16_bin(CHARSET_INFO *cs, ...@@ -1611,12 +1611,14 @@ my_hash_sort_utf16_bin(CHARSET_INFO *cs,
const uchar *pos, size_t len, ulong *nr1, ulong *nr2) const uchar *pos, size_t len, ulong *nr1, ulong *nr2)
{ {
const uchar *end= pos + cs->cset->lengthsp(cs, (const char *) pos, len); const uchar *end= pos + cs->cset->lengthsp(cs, (const char *) pos, len);
register ulong m1= *nr1, m2= *nr2;
for ( ; pos < end ; pos++) for ( ; pos < end ; pos++)
{ {
nr1[0]^= (ulong) ((((uint) nr1[0] & 63) + nr2[0]) * MY_HASH_ADD(m1, m2, (uint)*pos);
((uint)*pos)) + (nr1[0] << 8);
nr2[0]+= 3;
} }
*nr1= m1;
*nr2= m2;
} }
...@@ -2007,22 +2009,15 @@ my_caseup_utf32(CHARSET_INFO *cs, char *src, size_t srclen, ...@@ -2007,22 +2009,15 @@ my_caseup_utf32(CHARSET_INFO *cs, char *src, size_t srclen,
} }
static inline void
my_hash_add(ulong *n1, ulong *n2, uint ch)
{
n1[0]^= (((n1[0] & 63) + n2[0]) * (ch)) + (n1[0] << 8);
n2[0]+= 3;
}
static void static void
my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen, my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *n1, ulong *n2) ulong *nr1, ulong *nr2)
{ {
my_wc_t wc; my_wc_t wc;
int res; int res;
const uchar *e= s + slen; const uchar *e= s + slen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo; MY_UNICASE_INFO *uni_plane= cs->caseinfo;
register ulong m1= *nr1, m2= *nr2;
/* Skip trailing spaces */ /* Skip trailing spaces */
while (e > s + 3 && e[-1] == ' ' && !e[-2] && !e[-3] && !e[-4]) while (e > s + 3 && e[-1] == ' ' && !e[-2] && !e[-3] && !e[-4])
...@@ -2031,12 +2026,14 @@ my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen, ...@@ -2031,12 +2026,14 @@ my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen,
while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0) while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
{ {
my_tosort_utf32(uni_plane, &wc); my_tosort_utf32(uni_plane, &wc);
my_hash_add(n1, n2, (uint) (wc >> 24)); MY_HASH_ADD(m1, m2, (uint) (wc >> 24));
my_hash_add(n1, n2, (uint) (wc >> 16) & 0xFF); MY_HASH_ADD(m1, m2, (uint) (wc >> 16) & 0xFF);
my_hash_add(n1, n2, (uint) (wc >> 8) & 0xFF); MY_HASH_ADD(m1, m2, (uint) (wc >> 8) & 0xFF);
my_hash_add(n1, n2, (uint) (wc & 0xFF)); MY_HASH_ADD(m1, m2, (uint) (wc & 0xFF));
s+= res; s+= res;
} }
*nr1= m1;
*nr2= m2;
} }
...@@ -2976,12 +2973,13 @@ static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen, ...@@ -2976,12 +2973,13 @@ static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen, static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *n1, ulong *n2) ulong *nr1, ulong *nr2)
{ {
my_wc_t wc; my_wc_t wc;
int res; int res;
const uchar *e=s+slen; const uchar *e=s+slen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo; MY_UNICASE_INFO *uni_plane= cs->caseinfo;
register ulong m1= *nr1, m2= *nr2;
while (e > s+1 && e[-1] == ' ' && e[-2] == '\0') while (e > s+1 && e[-1] == ' ' && e[-2] == '\0')
e-= 2; e-= 2;
...@@ -2989,12 +2987,11 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen, ...@@ -2989,12 +2987,11 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0) while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
{ {
my_tosort_ucs2(uni_plane, &wc); my_tosort_ucs2(uni_plane, &wc);
n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8); MY_HASH_ADD_16(m1, m2, wc);
n2[0]+=3;
n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
n2[0]+=3;
s+=res; s+=res;
} }
*nr1= m1;
*nr2= m2;
} }
...@@ -3312,16 +3309,17 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), ...@@ -3312,16 +3309,17 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *key, size_t len,ulong *nr1, ulong *nr2) const uchar *key, size_t len,ulong *nr1, ulong *nr2)
{ {
const uchar *end = key + len; const uchar *end = key + len;
register ulong m1= *nr1, m2= *nr2;
while (end > key+1 && end[-1] == ' ' && end[-2] == '\0') while (end > key+1 && end[-1] == ' ' && end[-2] == '\0')
end-= 2; end-= 2;
for (; key < (uchar*) end ; key++) for (; key < (uchar*) end ; key++)
{ {
nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * MY_HASH_ADD(m1, m2, (uint)*key);
((uint)*key)) + (nr1[0] << 8);
nr2[0]+=3;
} }
*nr1= m1;
*nr2= m2;
} }
......
...@@ -5087,12 +5087,13 @@ static size_t my_caseup_utf8(CHARSET_INFO *cs, char *src, size_t srclen, ...@@ -5087,12 +5087,13 @@ static size_t my_caseup_utf8(CHARSET_INFO *cs, char *src, size_t srclen,
static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen, static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *n1, ulong *n2) ulong *nr1, ulong *nr2)
{ {
my_wc_t wc; my_wc_t wc;
int res; int res;
const uchar *e=s+slen; const uchar *e=s+slen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo; MY_UNICASE_INFO *uni_plane= cs->caseinfo;
register ulong m1= *nr1, m2= *nr2;
/* /*
Remove end space. We have to do this to be able to compare Remove end space. We have to do this to be able to compare
...@@ -5104,12 +5105,11 @@ static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen, ...@@ -5104,12 +5105,11 @@ static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen,
while ((s < e) && (res=my_utf8_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 ) while ((s < e) && (res=my_utf8_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
{ {
my_tosort_unicode(uni_plane, &wc, cs->state); my_tosort_unicode(uni_plane, &wc, cs->state);
n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8); MY_HASH_ADD_16(m1, m2, wc);
n2[0]+=3;
n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
n2[0]+=3;
s+=res; s+=res;
} }
*nr1= m1;
*nr2= m2;
} }
...@@ -7597,22 +7597,15 @@ my_caseup_utf8mb4(CHARSET_INFO *cs, char *src, size_t srclen, ...@@ -7597,22 +7597,15 @@ my_caseup_utf8mb4(CHARSET_INFO *cs, char *src, size_t srclen,
} }
static inline void
my_hash_add(ulong *n1, ulong *n2, uint ch)
{
n1[0]^= (((n1[0] & 63) + n2[0]) * (ch)) + (n1[0] << 8);
n2[0]+= 3;
}
static void static void
my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen, my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen,
ulong *n1, ulong *n2) ulong *nr1, ulong *nr2)
{ {
my_wc_t wc; my_wc_t wc;
int res; int res;
const uchar *e= s + slen; const uchar *e= s + slen;
MY_UNICASE_INFO *uni_plane= cs->caseinfo; MY_UNICASE_INFO *uni_plane= cs->caseinfo;
register ulong m1= *nr1, m2= *nr2;
/* /*
Remove end space. We do this to be able to compare Remove end space. We do this to be able to compare
...@@ -7624,8 +7617,7 @@ my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen, ...@@ -7624,8 +7617,7 @@ my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen,
while ((res= my_mb_wc_utf8mb4(cs, &wc, (uchar*) s, (uchar*) e)) > 0) while ((res= my_mb_wc_utf8mb4(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
{ {
my_tosort_unicode(uni_plane, &wc, cs->state); my_tosort_unicode(uni_plane, &wc, cs->state);
my_hash_add(n1, n2, (uint) (wc & 0xFF)); MY_HASH_ADD_16(m1, m2, (uint) (wc & 0xFFFF));
my_hash_add(n1, n2, (uint) (wc >> 8) & 0xFF);
if (wc > 0xFFFF) if (wc > 0xFFFF)
{ {
/* /*
...@@ -7635,10 +7627,12 @@ my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen, ...@@ -7635,10 +7627,12 @@ my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen,
This is useful to keep order of records in This is useful to keep order of records in
test results, e.g. for "SHOW GRANTS". test results, e.g. for "SHOW GRANTS".
*/ */
my_hash_add(n1, n2, (uint) (wc >> 16) & 0xFF); MY_HASH_ADD(m1, m2, (uint) ((wc >> 16) & 0xFF));
} }
s+= res; s+= res;
} }
*nr1= m1;
*nr2= m2;
} }
......
...@@ -100,4 +100,20 @@ static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len) ...@@ -100,4 +100,20 @@ static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len)
end--; end--;
return (end); return (end);
} }
/* Macros for hashing characters */
#define MY_HASH_ADD(A, B, value) \
do { A^= (((A & 63)+B)*((value)))+ (A << 8); B+=3; } while(0)
#define MY_HASH_ADD_16(A, B, value) \
do { MY_HASH_ADD(A, B, ((value) & 0xFF)) ; MY_HASH_ADD(A, B, ((value >>8 ))); } while(0)
/*
This one is needed to ensure we get the exact same hash as MariaDB 5.1
This is needed to ensure that old partitioned tables still work as before.
*/
#define MY_HASH_ADD_16_INV(A, B, value) \
do { MY_HASH_ADD(A, B, ((value >> 8))) ; MY_HASH_ADD(A, B, ((value & 0xFF ))); } while(0)
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment