Commit 6af72409 authored by unknown's avatar unknown

BUG#9622, stage 2, work together with fix for BUG#12232:

added "nulls_ignored" index statistics collection method for MyISAM tables.
(notification trigger: this is about BUG#9622).


include/my_base.h:
  BUG#9622: Added MI_STATS_METHOD_IGNORE_NULLS statistics collection method:
  Added SEARCH_RETURN_B_POS flag for ha_key_cmp()
include/my_handler.h:
  BUG#9622: Added MI_STATS_METHOD_IGNORE_NULLS statistics collection method: added ha_find_null()
include/myisam.h:
  BUG#9622: Added MI_STATS_METHOD_IGNORE_NULLS statistics collection method.
myisam/mi_check.c:
  BUG#9622: Added MI_STATS_METHOD_IGNORE_NULLS statistics collection method, added 
   mi_collect_stats_*(), updated update_key_parts() to deal with all 3 methods.
myisam/myisamchk.c:
  BUG#9622: Added nulls_ignored index statistics collection method for MyISAM
myisam/myisamdef.h:
  BUG#9622: Added MI_STATS_METHOD_IGNORE_NULLS statistics collection method.
myisam/sort.c:
  BUG#9622: Added MI_STATS_METHOD_IGNORE_NULLS statistics collection method.
mysql-test/r/myisam.result:
  Testcase for BUG9622
mysql-test/t/myisam.test:
  Testcase for BUG9622
mysys/my_handler.c:
  BUG#9622: ha_key_cmp() now supports new SEARCH_RETURN_B_POS flag, added ha_find_null()
sql/ha_myisam.cc:
  BUG#9622: Added MI_STATS_METHOD_IGNORE_NULLS statistics collection method.
sql/mysqld.cc:
  BUG#9622: Added MI_STATS_METHOD_IGNORE_NULLS statistics collection method.
parent 3765ae2d
...@@ -319,6 +319,8 @@ enum ha_base_keytype { ...@@ -319,6 +319,8 @@ enum ha_base_keytype {
#define SEARCH_NULL_ARE_EQUAL 32768 /* NULL in keys are equal */ #define SEARCH_NULL_ARE_EQUAL 32768 /* NULL in keys are equal */
#define SEARCH_NULL_ARE_NOT_EQUAL 65536 /* NULL in keys are not equal */ #define SEARCH_NULL_ARE_NOT_EQUAL 65536 /* NULL in keys are not equal */
#define SEARCH_RETURN_B_POS (65536*2) /* see ha_key_cmp for description */
/* bits in opt_flag */ /* bits in opt_flag */
#define QUICK_USED 1 #define QUICK_USED 1
#define READ_CACHE_USED 2 #define READ_CACHE_USED 2
......
...@@ -63,4 +63,6 @@ extern int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, ...@@ -63,4 +63,6 @@ extern int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
register uchar *b, uint key_length, uint nextflag, register uchar *b, uint key_length, uint nextflag,
uint *diff_pos); uint *diff_pos);
extern HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a);
#endif /* _my_handler_h */ #endif /* _my_handler_h */
...@@ -322,7 +322,9 @@ typedef enum ...@@ -322,7 +322,9 @@ typedef enum
/* Treat NULLs as inequal when collecting statistics (default for 4.1/5.0) */ /* Treat NULLs as inequal when collecting statistics (default for 4.1/5.0) */
MI_STATS_METHOD_NULLS_NOT_EQUAL, MI_STATS_METHOD_NULLS_NOT_EQUAL,
/* Treat NULLs as equal when collecting statistics (like 4.0 did) */ /* Treat NULLs as equal when collecting statistics (like 4.0 did) */
MI_STATS_METHOD_NULLS_EQUAL MI_STATS_METHOD_NULLS_EQUAL,
/* Ignore NULLs - count tuples without NULLs only */
MI_STATS_METHOD_IGNORE_NULLS
} enum_mi_stats_method; } enum_mi_stats_method;
typedef struct st_mi_check_param typedef struct st_mi_check_param
...@@ -349,7 +351,14 @@ typedef struct st_mi_check_param ...@@ -349,7 +351,14 @@ typedef struct st_mi_check_param
int tmpfile_createflag; int tmpfile_createflag;
myf myf_rw; myf myf_rw;
IO_CACHE read_cache; IO_CACHE read_cache;
/*
The next two are used to collect statistics, see update_key_parts for
description.
*/
ulonglong unique_count[MI_MAX_KEY_SEG+1]; ulonglong unique_count[MI_MAX_KEY_SEG+1];
ulonglong notnull_count[MI_MAX_KEY_SEG+1];
ha_checksum key_crc[MI_MAX_POSSIBLE_KEY]; ha_checksum key_crc[MI_MAX_POSSIBLE_KEY];
ulong rec_per_key_part[MI_MAX_KEY_SEG*MI_MAX_POSSIBLE_KEY]; ulong rec_per_key_part[MI_MAX_KEY_SEG*MI_MAX_POSSIBLE_KEY];
void *thd; void *thd;
...@@ -409,7 +418,8 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, ...@@ -409,7 +418,8 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
my_bool repair); my_bool repair);
int update_state_info(MI_CHECK *param, MI_INFO *info,uint update); int update_state_info(MI_CHECK *param, MI_INFO *info,uint update);
void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part, void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part,
ulonglong *unique, ulonglong records); ulonglong *unique, ulonglong *notnull,
ulonglong records);
int filecopy(MI_CHECK *param, File to,File from,my_off_t start, int filecopy(MI_CHECK *param, File to,File from,my_off_t start,
my_off_t length, const char *type); my_off_t length, const char *type);
int movepoint(MI_INFO *info,byte *record,my_off_t oldpos, int movepoint(MI_INFO *info,byte *record,my_off_t oldpos,
......
...@@ -391,7 +391,10 @@ int chk_key(MI_CHECK *param, register MI_INFO *info) ...@@ -391,7 +391,10 @@ int chk_key(MI_CHECK *param, register MI_INFO *info)
found_keys++; found_keys++;
param->record_checksum=init_checksum; param->record_checksum=init_checksum;
bzero((char*) &param->unique_count,sizeof(param->unique_count)); bzero((char*) &param->unique_count,sizeof(param->unique_count));
bzero((char*) &param->notnull_count,sizeof(param->notnull_count));
if ((!(param->testflag & T_SILENT))) if ((!(param->testflag & T_SILENT)))
printf ("- check data record references index: %d\n",key+1); printf ("- check data record references index: %d\n",key+1);
if (keyinfo->flag & HA_FULLTEXT) if (keyinfo->flag & HA_FULLTEXT)
...@@ -496,7 +499,9 @@ int chk_key(MI_CHECK *param, register MI_INFO *info) ...@@ -496,7 +499,9 @@ int chk_key(MI_CHECK *param, register MI_INFO *info)
if (param->testflag & T_STATISTICS) if (param->testflag & T_STATISTICS)
update_key_parts(keyinfo, rec_per_key_part, param->unique_count, update_key_parts(keyinfo, rec_per_key_part, param->unique_count,
(ulonglong) info->state->records); param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
param->notnull_count: NULL,
(ulonglong)info->state->records);
} }
if (param->testflag & T_INFO) if (param->testflag & T_INFO)
{ {
...@@ -552,6 +557,87 @@ static int chk_index_down(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, ...@@ -552,6 +557,87 @@ static int chk_index_down(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
return 1; return 1;
} }
/*
"Ignore NULLs" statistics collection method: process first index tuple.
SYNOPSIS
mi_collect_stats_nonulls_first()
keyseg IN Array of key part descriptions
notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i}
tuples that don't contain NULLs)
key IN Key values tuple
DESCRIPTION
Process the first index tuple - find out which prefix tuples don't
contain NULLs, and update the array of notnull counters accordingly.
*/
static
void mi_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull,
uchar *key)
{
uint first_null, kp;
first_null= ha_find_null(keyseg, key) - keyseg;
/*
All prefix tuples that don't include keypart_{first_null} are not-null
tuples (and all others aren't), increment counters for them.
*/
for (kp= 0; kp < first_null; kp++)
notnull[kp]++;
}
/*
"Ignore NULLs" statistics collection method: process next index tuple.
SYNOPSIS
mi_collect_stats_nonulls_next()
keyseg IN Array of key part descriptions
notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i}
tuples that don't contain NULLs)
prev_key IN Previous key values tuple
last_key IN Next key values tuple
DESCRIPTION
Process the next index tuple:
1. Find out which prefix tuples of last_key don't contain NULLs, and
update the array of notnull counters accordingly.
2. Find the first keypart number where the tuples are different(A), or
last_key has NULL value (B), and return it, so caller can count
number of unique tuples for each key prefix. We don't need (B) to be
counted, and that is compensated back in update_key_parts().
RETURN
1 + number of first keypart where values differ or last_key tuple has NULL
*/
static
int mi_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull,
uchar *prev_key, uchar *last_key)
{
uint diffs[2];
uint first_null_seg, kp;
/* Find first keypart where values are different or either of them is NULL */
ha_key_cmp(keyseg, prev_key, last_key, USE_WHOLE_KEY,
SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL | SEARCH_RETURN_B_POS,
diffs);
HA_KEYSEG *seg= keyseg + diffs[0] - 1;
/* Find first NULL in last_key */
first_null_seg= ha_find_null(seg, last_key + diffs[1]) - keyseg;
for (kp= 0; kp < first_null_seg; kp++)
notnull[kp]++;
/*
Return 1+ number of first key part where values differ. Don't care if
these were NULLs and not .... We compensate for that in
update_key_parts.
*/
return diffs[0];
}
/* Check if index is ok */ /* Check if index is ok */
static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
...@@ -641,8 +727,20 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, ...@@ -641,8 +727,20 @@ static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo,
ha_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY, ha_key_cmp(keyinfo->seg,info->lastkey,key,USE_WHOLE_KEY,
SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
&diff_pos); &diff_pos);
else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
{
diff_pos= mi_collect_stats_nonulls_next(keyinfo->seg,
param->notnull_count,
info->lastkey, key);
}
param->unique_count[diff_pos-1]++; param->unique_count[diff_pos-1]++;
} }
else
{
if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
mi_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count,
key);
}
} }
(*key_checksum)+= mi_byte_checksum((byte*) key, (*key_checksum)+= mi_byte_checksum((byte*) key,
key_length- info->s->rec_reflength); key_length- info->s->rec_reflength);
...@@ -2088,7 +2186,8 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, ...@@ -2088,7 +2186,8 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
if (param->testflag & T_STATISTICS) if (param->testflag & T_STATISTICS)
update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique, update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique,
(ulonglong) info->state->records); param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
sort_param.notnull: NULL,(ulonglong) info->state->records);
share->state.key_map|=(ulonglong) 1 << sort_param.key; share->state.key_map|=(ulonglong) 1 << sort_param.key;
if (sort_param.fix_datafile) if (sort_param.fix_datafile)
...@@ -3255,11 +3354,21 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a) ...@@ -3255,11 +3354,21 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a)
ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey, ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey,
(uchar*) a, USE_WHOLE_KEY, (uchar*) a, USE_WHOLE_KEY,
SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, &diff_pos); SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, &diff_pos);
else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
{
diff_pos= mi_collect_stats_nonulls_next(sort_param->seg,
sort_param->notnull,
sort_info->key_block->lastkey,
(uchar*)a);
}
sort_param->unique[diff_pos-1]++; sort_param->unique[diff_pos-1]++;
} }
else else
{ {
cmp= -1; cmp= -1;
if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
mi_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull,
(uchar*)a);
} }
if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0) if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0)
{ {
...@@ -3981,21 +4090,30 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, ...@@ -3981,21 +4090,30 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
SYNOPSIS SYNOPSIS
update_key_parts() update_key_parts()
keyinfo Index information (only key->keysegs used) keyinfo IN Index information (only key->keysegs used)
rec_per_key_part OUT Store statistics here rec_per_key_part OUT Store statistics here
unique IN Array of #distinct values collected over index unique IN Array of (#distinct tuples)
run. notnull_tuples IN Array of (#tuples), or NULL
records Number of records in the table records Number of records in the table
NOTES NOTES
This function handles all 3 index statistics collection methods.
Unique is an array: Unique is an array:
unique[0]= (#different values of {keypart1}) - 1 unique[0]= (#different values of {keypart1}) - 1
unique[1]= (#different values of {keypart2,keypart1} tuple) - unique[0] - 1 unique[1]= (#different values of {keypart1,keypart2} tuple) - unique[0] - 1
... ...
For MI_STATS_METHOD_IGNORE_NULLS notnull_tuples is an array too:
notnull_tuples[0]= (# of {keypart1} tuples such that keypart1 is not NULL)
notnull_tuples[1]= (# of {keypart1,keypart2} tuples such that all
keypart{i} are not NULL)
...
For all other statistics collection methods notnull_tuples=NULL.
The 'unique' array is collected in one sequential scan through the entire The 'unique' array is collected in one sequential scan through the entire
index. This is done in two places: in chk_index() and in sort_key_write(). index. This is done in two places: in chk_index() and in sort_key_write().
Statistics collection may consider NULLs as either equal or unequal (see notnull_tuples, if present, is collected during the same index scan.
SEARCH_NULL_ARE_NOT_EQUAL, MI_STATS_METHOD_*).
Output is an array: Output is an array:
rec_per_key_part[k] = rec_per_key_part[k] =
...@@ -4007,25 +4125,53 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, ...@@ -4007,25 +4125,53 @@ void update_auto_increment_key(MI_CHECK *param, MI_INFO *info,
index tuples} index tuples}
= #tuples-in-the-index / #distinct-tuples-in-the-index. = #tuples-in-the-index / #distinct-tuples-in-the-index.
The #tuples-in-the-index and #distinct-tuples-in-the-index have different
meaning depending on which statistics collection method is used:
MI_STATS_METHOD_* how are nulls compared? which tuples are counted?
NULLS_EQUAL NULL == NULL all tuples in table
NULLS_NOT_EQUAL NULL != NULL all tuples in table
IGNORE_NULLS n/a tuples that don't have NULLs
*/ */
void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part, void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part,
ulonglong *unique, ulonglong records) ulonglong *unique, ulonglong *notnull,
ulonglong records)
{ {
ulonglong count=0,tmp; ulonglong count=0,tmp, unique_tuples;
ulonglong tuples= records;
uint parts; uint parts;
for (parts=0 ; parts < keyinfo->keysegs ; parts++) for (parts=0 ; parts < keyinfo->keysegs ; parts++)
{ {
count+=unique[parts]; count+=unique[parts];
if (count == 0) unique_tuples= count + 1;
tmp=records; if (notnull)
{
tuples= notnull[parts];
/*
#(unique_tuples not counting tuples with NULLs) =
#(unique_tuples counting tuples with NULLs as different) -
#(tuples with NULLs)
*/
unique_tuples -= (records - notnull[parts]);
}
if (unique_tuples == 0)
tmp= 1;
else if (count == 0)
tmp= tuples; /* 1 unique tuple */
else else
tmp= (records + (count+1)/2) / (count+1); tmp= (tuples + unique_tuples/2) / unique_tuples;
/* for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
let's ensure it is not */ /*
for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
let's ensure it is not
*/
set_if_bigger(tmp,1); set_if_bigger(tmp,1);
if (tmp >= (ulonglong) ~(ulong) 0) if (tmp >= (ulonglong) ~(ulong) 0)
tmp=(ulonglong) ~(ulong) 0; tmp=(ulonglong) ~(ulong) 0;
*rec_per_key_part=(ulong) tmp; *rec_per_key_part=(ulong) tmp;
rec_per_key_part++; rec_per_key_part++;
} }
......
...@@ -339,7 +339,8 @@ static struct my_option my_long_options[] = ...@@ -339,7 +339,8 @@ static struct my_option my_long_options[] =
REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"stats_method", OPT_STATS_METHOD, {"stats_method", OPT_STATS_METHOD,
"Specifies how index statistics collection code should threat NULLs. " "Specifies how index statistics collection code should threat NULLs. "
"Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), and \"nulls_equal\" (emulate 4.0 behavior).", "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), "
"\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".",
(gptr*) &myisam_stats_method_str, (gptr*) &myisam_stats_method_str, 0, (gptr*) &myisam_stats_method_str, (gptr*) &myisam_stats_method_str, 0,
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
...@@ -451,6 +452,10 @@ static void usage(void) ...@@ -451,6 +452,10 @@ static void usage(void)
-a, --analyze Analyze distribution of keys. Will make some joins in\n\ -a, --analyze Analyze distribution of keys. Will make some joins in\n\
MySQL faster. You can check the calculated distribution\n\ MySQL faster. You can check the calculated distribution\n\
by using '--description --verbose table_name'.\n\ by using '--description --verbose table_name'.\n\
--stats_method=name Specifies how index statistics collection code should\n\
threat NULLs. Possible values of name are \"nulls_unequal\"\n\
(default for 4.1/5.0), \"nulls_equal\" (emulate 4.0), and \n\
\"nulls_ignored\".\n\
-d, --description Prints some information about table.\n\ -d, --description Prints some information about table.\n\
-A, --set-auto-increment[=value]\n\ -A, --set-auto-increment[=value]\n\
Force auto_increment to start at this or higher value\n\ Force auto_increment to start at this or higher value\n\
...@@ -472,7 +477,7 @@ static void usage(void) ...@@ -472,7 +477,7 @@ static void usage(void)
#include <help_end.h> #include <help_end.h>
const char *myisam_stats_method_names[] = {"nulls_unequal", "nulls_equal", const char *myisam_stats_method_names[] = {"nulls_unequal", "nulls_equal",
NullS}; "nulls_ignored", NullS};
TYPELIB myisam_stats_method_typelib= { TYPELIB myisam_stats_method_typelib= {
array_elements(myisam_stats_method_names) - 1, "", array_elements(myisam_stats_method_names) - 1, "",
myisam_stats_method_names, NULL}; myisam_stats_method_names, NULL};
...@@ -699,14 +704,25 @@ get_one_option(int optid, ...@@ -699,14 +704,25 @@ get_one_option(int optid,
case OPT_STATS_METHOD: case OPT_STATS_METHOD:
{ {
int method; int method;
enum_mi_stats_method method_conv;
myisam_stats_method_str= argument; myisam_stats_method_str= argument;
if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0) if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0)
{ {
fprintf(stderr, "Invalid value of stats_method: %s.\n", argument); fprintf(stderr, "Invalid value of stats_method: %s.\n", argument);
exit(1); exit(1);
} }
check_param.stats_method= test(method-1)? MI_STATS_METHOD_NULLS_EQUAL : switch (method-1) {
MI_STATS_METHOD_NULLS_NOT_EQUAL; case 0:
method_conv= MI_STATS_METHOD_NULLS_EQUAL;
break;
case 1:
method_conv= MI_STATS_METHOD_NULLS_NOT_EQUAL;
break;
case 2:
method_conv= MI_STATS_METHOD_IGNORE_NULLS;
break;
}
check_param.stats_method= method_conv;
break; break;
} }
#ifdef DEBUG /* Only useful if debugging */ #ifdef DEBUG /* Only useful if debugging */
......
...@@ -297,7 +297,14 @@ typedef struct st_mi_sort_param ...@@ -297,7 +297,14 @@ typedef struct st_mi_sort_param
pthread_t thr; pthread_t thr;
IO_CACHE read_cache, tempfile, tempfile_for_exceptions; IO_CACHE read_cache, tempfile, tempfile_for_exceptions;
DYNAMIC_ARRAY buffpek; DYNAMIC_ARRAY buffpek;
/*
The next two are used to collect statistics, see update_key_parts for
description.
*/
ulonglong unique[MI_MAX_KEY_SEG+1]; ulonglong unique[MI_MAX_KEY_SEG+1];
ulonglong notnull[MI_MAX_KEY_SEG+1];
my_off_t pos,max_pos,filepos,start_recpos; my_off_t pos,max_pos,filepos,start_recpos;
uint key, key_length,real_key_length,sortbuff_size; uint key, key_length,real_key_length,sortbuff_size;
uint maxbuffers, keys, find_length, sort_keys_length; uint maxbuffers, keys, find_length, sort_keys_length;
......
...@@ -481,8 +481,12 @@ int thr_write_keys(MI_SORT_PARAM *sort_param) ...@@ -481,8 +481,12 @@ int thr_write_keys(MI_SORT_PARAM *sort_param)
{ {
share->state.key_map|=(ulonglong) 1 << sinfo->key; share->state.key_map|=(ulonglong) 1 << sinfo->key;
if (param->testflag & T_STATISTICS) if (param->testflag & T_STATISTICS)
update_key_parts(sinfo->keyinfo, rec_per_key_part, update_key_parts(sinfo->keyinfo, rec_per_key_part, sinfo->unique,
sinfo->unique, (ulonglong) info->state->records); param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
sinfo->notnull: NULL,
(ulonglong) info->state->records);
if (!sinfo->buffpek.elements) if (!sinfo->buffpek.elements)
{ {
if (param->testflag & T_VERBOSE) if (param->testflag & T_VERBOSE)
......
...@@ -670,3 +670,35 @@ show index from t1; ...@@ -670,3 +670,35 @@ show index from t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
t1 1 a 1 a A 10 NULL NULL YES BTREE t1 1 a 1 a A 10 NULL NULL YES BTREE
drop table t1; drop table t1;
set myisam_stats_method=nulls_ignored;
show variables like 'myisam_stats_method';
Variable_name Value
myisam_stats_method nulls_ignored
create table t1 (
a char(3), b char(4), c char(5), d char(6),
key(a,b,c,d)
);
insert into t1 values ('bcd','def1', NULL, 'zz');
insert into t1 values ('bcd','def2', NULL, 'zz');
insert into t1 values ('bce','def1', 'yuu', NULL);
insert into t1 values ('bce','def2', NULL, 'quux');
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
show index from t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
t1 1 a 1 a A 2 NULL NULL YES BTREE
t1 1 a 2 b A 4 NULL NULL YES BTREE
t1 1 a 3 c A 4 NULL NULL YES BTREE
t1 1 a 4 d A 4 NULL NULL YES BTREE
delete from t1;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
show index from t1;
Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment
t1 1 a 1 a A 0 NULL NULL YES BTREE
t1 1 a 2 b A 0 NULL NULL YES BTREE
t1 1 a 3 c A 0 NULL NULL YES BTREE
t1 1 a 4 d A 0 NULL NULL YES BTREE
set myisam_stats_method=DEFAULT;
...@@ -637,4 +637,23 @@ show index from t1; ...@@ -637,4 +637,23 @@ show index from t1;
drop table t1; drop table t1;
# WL#2609, CSC#XXXX: MyISAM
set myisam_stats_method=nulls_ignored;
show variables like 'myisam_stats_method';
create table t1 (
a char(3), b char(4), c char(5), d char(6),
key(a,b,c,d)
);
insert into t1 values ('bcd','def1', NULL, 'zz');
insert into t1 values ('bcd','def2', NULL, 'zz');
insert into t1 values ('bce','def1', 'yuu', NULL);
insert into t1 values ('bce','def2', NULL, 'quux');
analyze table t1;
show index from t1;
delete from t1;
analyze table t1;
show index from t1;
set myisam_stats_method=DEFAULT;
# End of 4.1 tests # End of 4.1 tests
...@@ -75,7 +75,7 @@ static int compare_bin(uchar *a, uint a_length, uchar *b, uint b_length, ...@@ -75,7 +75,7 @@ static int compare_bin(uchar *a, uint a_length, uchar *b, uint b_length,
SYNOPSIS SYNOPSIS
ha_key_cmp() ha_key_cmp()
keyseg Key segments of key to compare keyseg Array of key segments of key to compare
a First key to compare, in format from _mi_pack_key() a First key to compare, in format from _mi_pack_key()
This is normally key specified by user This is normally key specified by user
b Second key to compare. This is always from a row b Second key to compare. This is always from a row
...@@ -84,10 +84,20 @@ static int compare_bin(uchar *a, uint a_length, uchar *b, uint b_length, ...@@ -84,10 +84,20 @@ static int compare_bin(uchar *a, uint a_length, uchar *b, uint b_length,
next_flag How keys should be compared next_flag How keys should be compared
If bit SEARCH_FIND is not set the keys includes the row If bit SEARCH_FIND is not set the keys includes the row
position and this should also be compared position and this should also be compared
diff_pos OUT Number of first keypart where values differ, counting
from one.
NOTES NOTES
Number-keys can't be splited Number-keys can't be splited
DESCRIPTION
If SEARCH_RETURN_B_POS flag is set, diff_pos must point to array of 2
values, first value has the meaning as described above, second value is:
diff_pos[1] OUT (b + diff_pos[1]) points to first value in tuple b
that is different from corresponding value in tuple a.
RETURN VALUES RETURN VALUES
<0 If a < b <0 If a < b
0 If a == b 0 If a == b
...@@ -107,6 +117,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, ...@@ -107,6 +117,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
float f_1,f_2; float f_1,f_2;
double d_1,d_2; double d_1,d_2;
uint next_key_length; uint next_key_length;
uchar *orig_b= b;
*diff_pos=0; *diff_pos=0;
for ( ; (int) key_length >0 ; key_length=next_key_length, keyseg++) for ( ; (int) key_length >0 ; key_length=next_key_length, keyseg++)
...@@ -115,6 +126,9 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, ...@@ -115,6 +126,9 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
uint piks=! (keyseg->flag & HA_NO_SORT); uint piks=! (keyseg->flag & HA_NO_SORT);
(*diff_pos)++; (*diff_pos)++;
if (nextflag & SEARCH_RETURN_B_POS)
diff_pos[1]= (uint)(b - orig_b);
/* Handle NULL part */ /* Handle NULL part */
if (keyseg->null_bit) if (keyseg->null_bit)
{ {
...@@ -448,3 +462,84 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, ...@@ -448,3 +462,84 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a,
} }
return 0; return 0;
} /* ha_key_cmp */ } /* ha_key_cmp */
/*
Find the first NULL value in index-suffix values tuple
SYNOPSIS
ha_find_null()
keyseg Array of keyparts for key suffix
a Key suffix value tuple
DESCRIPTION
Find the first NULL value in index-suffix values tuple.
TODO Consider optimizing this fuction or its use so we don't search for
NULL values in completely NOT NULL index suffixes.
RETURN
First key part that has NULL as value in values tuple, or the last key part
(with keyseg->type==HA_TYPE_END) if values tuple doesn't contain NULLs.
*/
HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a)
{
for (; (enum ha_base_keytype) keyseg->type != HA_KEYTYPE_END; keyseg++)
{
uchar *end;
if (keyseg->null_bit)
{
if (!*a++)
return keyseg;
}
end= a+ keyseg->length;
switch ((enum ha_base_keytype) keyseg->type) {
case HA_KEYTYPE_TEXT:
case HA_KEYTYPE_BINARY:
if (keyseg->flag & HA_SPACE_PACK)
{
int a_length;
get_key_length(a_length, a);
a += a_length;
break;
}
else
a= end;
break;
case HA_KEYTYPE_VARTEXT:
case HA_KEYTYPE_VARBINARY:
{
int a_length;
get_key_length(a_length, a);
a+= a_length;
break;
}
case HA_KEYTYPE_NUM:
if (keyseg->flag & HA_SPACE_PACK)
{
int alength= *a++;
end= a+alength;
}
a= end;
break;
case HA_KEYTYPE_INT8:
case HA_KEYTYPE_SHORT_INT:
case HA_KEYTYPE_USHORT_INT:
case HA_KEYTYPE_LONG_INT:
case HA_KEYTYPE_ULONG_INT:
case HA_KEYTYPE_INT24:
case HA_KEYTYPE_UINT24:
#ifdef HAVE_LONG_LONG
case HA_KEYTYPE_LONGLONG:
case HA_KEYTYPE_ULONGLONG:
#endif
case HA_KEYTYPE_FLOAT:
case HA_KEYTYPE_DOUBLE:
a= end;
break;
}
}
return keyseg;
}
...@@ -40,7 +40,7 @@ TYPELIB myisam_recover_typelib= {array_elements(myisam_recover_names)-1,"", ...@@ -40,7 +40,7 @@ TYPELIB myisam_recover_typelib= {array_elements(myisam_recover_names)-1,"",
myisam_recover_names, NULL}; myisam_recover_names, NULL};
const char *myisam_stats_method_names[] = {"nulls_unequal", "nulls_equal", const char *myisam_stats_method_names[] = {"nulls_unequal", "nulls_equal",
NullS}; "nulls_ignored", NullS};
TYPELIB myisam_stats_method_typelib= { TYPELIB myisam_stats_method_typelib= {
array_elements(myisam_stats_method_names) - 1, "", array_elements(myisam_stats_method_names) - 1, "",
myisam_stats_method_names, NULL}; myisam_stats_method_names, NULL};
......
...@@ -5212,7 +5212,8 @@ The minimum value for this variable is 4096.", ...@@ -5212,7 +5212,8 @@ The minimum value for this variable is 4096.",
GET_ULONG, REQUIRED_ARG, 8192*1024, 4, ~0L, 0, 1, 0}, GET_ULONG, REQUIRED_ARG, 8192*1024, 4, ~0L, 0, 1, 0},
{"myisam_stats_method", OPT_MYISAM_STATS_METHOD, {"myisam_stats_method", OPT_MYISAM_STATS_METHOD,
"Specifies how MyISAM index statistics collection code should threat NULLs. " "Specifies how MyISAM index statistics collection code should threat NULLs. "
"Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), and \"nulls_equal\" (emulate 4.0 behavior).", "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), "
"\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".",
(gptr*) &myisam_stats_method_str, (gptr*) &myisam_stats_method_str, 0, (gptr*) &myisam_stats_method_str, (gptr*) &myisam_stats_method_str, 0,
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"net_buffer_length", OPT_NET_BUFFER_LENGTH, {"net_buffer_length", OPT_NET_BUFFER_LENGTH,
...@@ -6405,16 +6406,26 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), ...@@ -6405,16 +6406,26 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
} }
case OPT_MYISAM_STATS_METHOD: case OPT_MYISAM_STATS_METHOD:
{ {
myisam_stats_method_str= argument;
int method; int method;
ulong method_conv;
myisam_stats_method_str= argument;
if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0) if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0)
{ {
fprintf(stderr, "Invalid value of myisam_stats_method: %s.\n", argument); fprintf(stderr, "Invalid value of myisam_stats_method: %s.\n", argument);
exit(1); exit(1);
} }
global_system_variables.myisam_stats_method= switch (method-1) {
test(method-1)? MI_STATS_METHOD_NULLS_EQUAL : case 0:
MI_STATS_METHOD_NULLS_NOT_EQUAL; method_conv= MI_STATS_METHOD_NULLS_EQUAL;
break;
case 1:
method_conv= MI_STATS_METHOD_NULLS_NOT_EQUAL;
break;
case 2:
method_conv= MI_STATS_METHOD_IGNORE_NULLS;
break;
}
global_system_variables.myisam_stats_method= method_conv;
break; break;
} }
case OPT_SQL_MODE: case OPT_SQL_MODE:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment