/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #define NO_YACC_SYMBOLS #include <my_global.h> #include <my_sys.h> #include <m_string.h> #ifndef __GNU_LIBRARY__ #define __GNU_LIBRARY__ // Skipp warnings in getopt.h #endif #include <getopt.h> #include "mysql_version.h" #include "lex.h" bool opt_search=0; int opt_verbose=0; ulong opt_count=100000; #define max_allowed_array 8000 // Don't generate bigger arrays than this #define max_symbol 32767 // Use this for 'not found' #define how_much_for_plus 8 // 2-8 #define type_count 1 // 1-5 #define char_table_count 5 #define total_symbols (sizeof(symbols)/sizeof(SYMBOL) +\ sizeof(sql_functions)/sizeof(SYMBOL)) #define how_much_and INT_MAX24 /* The following only have to work with characters in the set used by SQL commands */ #undef tolower #define tolower(a) ((a) >= 'A' && (a) <= 'Z') ? ((a)- 'A' + 'a') : (a) static uint how_long_symbols,function_plus,function_mod,function_type; static uint char_table[256]; static uchar unique_length[256]; static uchar bits[how_much_and/8+1]; static uint primes[max_allowed_array+1]; static ulong hash_results[type_count][how_much_for_plus+1][total_symbols]; static ulong start_value=0; struct rand_struct { unsigned long seed1,seed2,max_value; double max_value_dbl; }; void randominit(struct rand_struct *rand_st,ulong seed1, ulong seed2) { /* For mysql 3.21.# */ rand_st->max_value= 0x3FFFFFFFL; rand_st->max_value_dbl=(double) rand_st->max_value; rand_st->seed1=seed1%rand_st->max_value ; rand_st->seed2=seed2%rand_st->max_value; } double rnd(struct rand_struct *rand_st) { rand_st->seed1=(rand_st->seed1*3+rand_st->seed2) % rand_st->max_value; rand_st->seed2=(rand_st->seed1+rand_st->seed2+33) % rand_st->max_value; return (((double) rand_st->seed1)/rand_st->max_value_dbl); } static void make_char_table(ulong t1,ulong t2,int type) { uint i; struct rand_struct rand_st; randominit(&rand_st,t1,t2); for (i=0 ; i < 256 ; i++) { switch (type) { case 0: char_table[i]= i + (i << 8); break; case 1: char_table[i]= i + ((i ^255 ) << 8); break; case 2: char_table[i]= i; break; case 3: char_table[i]= i + ((uint) (rnd(&rand_st)*255) << 8); break; case 4: char_table[i]= (uint) (rnd(&rand_st)*255) + (i << 8); break; } } char_table[0]|=1+257; // Avoid problems with 0 for (i=0 ; i < 256 ; i++) { uint tmp=(uint) (rnd(&rand_st)*255); swap(uint,char_table[i],char_table[tmp]); } /* lower characters should be mapped to upper */ for (i= 'a' ; i <= 'z' ; i++) { /* This loop is coded with extra variables to avoid a bug in gcc 2.96 */ uchar tmp= (uchar) (i - 'a'); // Assume ascii tmp+='A'; char_table[i]=char_table[tmp]; } } /* Fill array primes with primes between start and 'max_allowed_array' */ static void make_prime_array(uint start) { uint i,j,*to; uint max_index=(uint) sqrt((double) max_allowed_array); bzero((char*) primes,sizeof(primes[0])*max_allowed_array); i=2; while (i < max_index) { for (j=i+i ; j <= max_allowed_array ; j+=i) primes[j]=1; while (primes[++i]) ; } to=primes; for (i=start ; i <= max_allowed_array ; i++) if (!primes[i]) *to++=i; *to=0; // end marker } #define USE_char_table static ulong tab_index_function(const char *s,uint add, uint type) { register ulong nr=start_value+char_table[(uchar) *s]; // Nice value ulong pos=3; uint tmp_length=unique_length[(uchar) *s]-1; while (*++s && tmp_length-- > 0) { switch (type) { case 0: nr= (nr ^ (char_table[(uchar) *s] + (nr << add))); break; case 1: nr= (nr + (char_table[(uchar) *s] + (nr << add))); break; case 2: nr= (nr ^ (char_table[(uchar) *s] ^ (nr << add))); break; case 3: nr= (char_table[(uchar) *s] ^ (nr << add)); break; case 4: nr+= nr+nr+((nr & 63)+pos)*((ulong) char_table[(uchar) *s]); pos+=add; break; } } return nr & INT_MAX24; } static int search(bool write_warning) { uint size_symbols = sizeof(symbols)/sizeof(SYMBOL); uint size_functions = sizeof(sql_functions)/sizeof(SYMBOL); uint size=size_symbols + size_functions; uint i=0,found,*prime,type; int igra[max_allowed_array],test_count=INT_MAX; uint possible_plus[how_much_for_plus*type_count+type_count]; how_long_symbols = sizeof(symbols)/sizeof(SYMBOL); bzero((char*) possible_plus,sizeof(possible_plus)); found=0; /* Check first which function_plus are possible */ for (type=0 ; type < type_count ; type ++) { for (function_plus = 1; function_plus <= how_much_for_plus; function_plus++) { bzero((char*) bits,sizeof(bits)); for (i=0; i < size; i++) { ulong order= tab_index_function ((i < how_long_symbols) ? symbols[i].name : sql_functions[i-how_long_symbols].name, function_plus, type); hash_results[type][function_plus][i]=order; uint pos=order/8; uint bit=order & 7; if (bits[pos] & (1 << bit)) break; bits[pos]|=1 << bit; } if (i == size) { possible_plus[found++]=function_plus; } } possible_plus[found++]=0; // End marker } if (found == type_count) { if (write_warning) fprintf(stderr,"\ The hash function didn't return a unique value for any parameter\n\ You have to change gen_lex_code.cc, function 'tab_index_function' to\n\ generate unique values for some parameter. When you have succeeded in this,\n\ you have to change 'main' to print out the new function\n"); return(1); } if (opt_verbose > 1) fprintf (stderr,"Info: Possible add values: %d\n",found-type_count); for (prime=primes; (function_mod=*prime) ; prime++) { uint *plus_ptr=possible_plus; for (type=0 ; type < type_count ; type++ ) { while ((function_plus= *plus_ptr++)) { ulong *order_pos= &hash_results[type][function_plus][0]; if (test_count++ == INT_MAX) { test_count=1; bzero((char*) igra,sizeof(igra)); } for (i=0; i<size ;i++) { ulong order; order = *order_pos++ % function_mod; if (igra[order] == test_count) break; igra[order] = test_count; } if (i == size) { *prime=0; // Mark this used function_type=type; return 0; // Found ok value } } } } function_mod=max_allowed_array; if (write_warning) fprintf (stderr,"Fatal error when generating hash for symbols\n\ Didn't find suitable values for perfect hashing:\n\ You have to edit gen_lex_hash.cc to generate a new hashing function.\n\ You can try running gen_lex_hash with --search to find a suitable value\n\ Symbol array size = %d\n",function_mod); return -1; } void print_arrays() { uint size_symbols = sizeof(symbols)/sizeof(SYMBOL); uint size_functions = sizeof(sql_functions)/sizeof(SYMBOL); uint size=size_symbols + size_functions; uint i; fprintf(stderr,"Symbols: %d Functions: %d; Total: %d\nShifts per char: %d, Array size: %d\n", size_symbols,size_functions,size_symbols+size_functions, function_plus,function_mod); int *prva= (int*) my_alloca(sizeof(int)*function_mod); for (i=0 ; i <= function_mod; i++) prva[i]= max_symbol; for (i=0;i<size;i++) { ulong order = tab_index_function ((i < how_long_symbols) ? symbols[i].name : sql_functions[i - how_long_symbols].name,function_plus,function_type); order %= function_mod; prva [order] = i; } #ifdef USE_char_table printf("static uint16 char_table[] = {\n"); for (i=0; i < 255 ;i++) // < 255 is correct { printf("%u,",char_table[i]); if (((i+1) & 15) == 0) puts(""); } printf("%d\n};\n\n\n",char_table[i]); #endif printf("static uchar unique_length[] = {\n"); for (i=0; i < 255 ;i++) // < 255 is correct { printf("%u,",unique_length[i]); if (((i+1) & 15) == 0) puts(""); } printf("%d\n};\n\n\n",unique_length[i]); printf("static uint16 my_function_table[] = {\n"); for (i=0; i < function_mod-1 ;i++) { printf("%d,",prva[i]); if (((i+1) % 12) == 0) puts(""); } printf("%d\n};\n\n\n",prva[i]); my_afree((gptr) prva); } static struct option long_options[] = { {"count", required_argument, 0, 'c'}, {"search", no_argument, 0, 'S'}, {"verbose", no_argument, 0, 'v'}, {"version", no_argument, 0, 'V'}, {"rnd1", required_argument, 0, 'r'}, {"rnd2", required_argument, 0, 'R'}, {"type", required_argument, 0, 't'}, {0, 0, 0, 0} }; static void usage(int version) { printf("%s Ver 3.2 Distrib %s, for %s (%s)\n", my_progname, MYSQL_SERVER_VERSION, SYSTEM_TYPE, MACHINE_TYPE); if (version) return; puts("Copyright (C) 2000 MySQL AB & MySQL Finland AB, by Sinisa and Monty"); puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n"); puts("This program generates a perfect hashing function for the sql_lex.cc"); printf("Usage: %s [OPTIONS]\n", my_progname); printf("\n\ -c, --count=# Try count times to find a optimal hash table\n\ -r, --rnd1=# Set 1 part of rnd value for hash generator\n\ -R, --rnd2=# Set 2 part of rnd value for hash generator\n\ -t, --type=# Set type of char table to generate\n\ -S, --search Search after good rnd1 and rnd2 values\n\ -v, --verbose Write some information while the program executes\n\ -V, --version Output version information and exit\n"); } static uint best_type; static ulong best_t1,best_t2, best_start_value; static int get_options(int argc, char **argv) { int c,option_index=0; while ((c=getopt_long(argc,argv,"?SvVc:r:R:t:", long_options, &option_index)) != EOF) { switch(c) { case 'c': opt_count=atol(optarg); break; case 'r': best_t1=atol(optarg); break; case 'R': best_t2=atol(optarg); break; case 't': best_type=atoi(optarg); break; case 'S': opt_search=1; break; case 'v': opt_verbose++; break; case 'V': usage(1); exit(0); case 'I': case '?': usage(0); exit(0); default: fprintf(stderr,"illegal option: -%c\n",opterr); usage(0); exit(1); } } argc-=optind; argv+=optind; if (argc >= 1) { usage(0); exit(1); } return(0); } static uint max_prefix(const char *name) { uint i; uint max_length=1; for (i=0 ; i < sizeof(symbols)/sizeof(SYMBOL) ; i++) { const char *str=symbols[i].name; if (str != name) { const char *str2=name; uint length; while (*str && *str == *str2) { str++; str2++; } length=(uint) (str2 - name)+1; if (length > max_length) max_length=length; } } for (i=0 ; i < sizeof(sql_functions)/sizeof(SYMBOL) ; i++) { const char *str=sql_functions[i].name; if (str != name) { const char *str2=name; uint length; while (*str && *str == *str2) { str++; str2++; } length=(uint) (str2 - name)+1; if (length > max_length) max_length=length; } } return max_length; } static void make_max_length_table(void) { uint i; for (i=0 ; i < sizeof(symbols)/sizeof(SYMBOL) ; i++) { uint length=max_prefix(symbols[i].name); if (length > unique_length[(uchar) symbols[i].name[0]]) { unique_length[(uchar) symbols[i].name[0]]=length; unique_length[(uchar) tolower(symbols[i].name[0])]=length; } } for (i=0 ; i < sizeof(sql_functions)/sizeof(SYMBOL) ; i++) { uint length=max_prefix(sql_functions[i].name); if (length > unique_length[(uchar) sql_functions[i].name[0]]) { unique_length[(uchar) sql_functions[i].name[0]]=length; unique_length[(uchar) tolower(sql_functions[i].name[0])]=length; } } } int main(int argc,char **argv) { struct rand_struct rand_st; static uint best_mod,best_add,best_functype; int error; MY_INIT(argv[0]); start_value=1109118L; best_t1=6657025L; best_t2=6114496L; best_type=1; /* mode=4903 add=3 type: 0 */ if (get_options(argc,(char **) argv)) exit(1); make_max_length_table(); make_char_table(best_t1,best_t2,best_type); make_prime_array(sizeof(symbols)/sizeof(SYMBOL) + sizeof(sql_functions)/sizeof(SYMBOL)); if ((error=search(1)) > 0 || error && !opt_search) exit(1); // This should work best_mod=function_mod; best_add=function_plus; best_functype=function_type; if (opt_search) { time_t start_time=time((time_t*) 0); randominit(&rand_st,start_time,start_time/2); // Some random values printf("start_value=%ldL; best_t1=%ldL; best_t2=%ldL; best_type=%d; /* mode=%d add=%d type: %d */\n", start_value, best_t1,best_t2,best_type,best_mod,best_add, best_functype); best_start_value=start_value; for (uint i=1 ; i <= opt_count ; i++) { if (i % 10 == 0) { putchar('.'); fflush(stdout); } ulong t1=(ulong) (rnd(&rand_st)*INT_MAX24); ulong t2=(ulong) (rnd(&rand_st)*INT_MAX24); uint type=(int) (rnd(&rand_st)*char_table_count); start_value=(ulong) (rnd(&rand_st)*INT_MAX24); make_char_table(t1,t2,type); if (!search(0)) { best_mod=function_mod; best_add=function_plus; best_functype=function_type; best_t1=t1; best_t2=t2; best_type=type; best_start_value=start_value; printf("\nstart_value=%ldL; best_t1=%ldL; best_t2=%ldL; best_type=%d; /* mode=%d add=%d type: %d */\n", best_start_value,best_t1,best_t2,best_type,best_mod,best_add, best_functype); } if (opt_verbose && (i % 20000) == 0) printf("\nstart_value=%ldL; best_t1=%ldL; best_t2=%ldL; best_type=%d; /* mode=%d add=%d type: %d */\n", best_start_value,best_t1,best_t2,best_type,best_mod,best_add, best_functype); } } function_mod=best_mod; function_plus=best_add; make_char_table(best_t1,best_t2,best_type); printf("/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB\n\ This program is free software; you can redistribute it and/or modify\n\ it under the terms of the GNU General Public License as published by\n\ the Free Software Foundation; either version 2 of the License, or\n\ (at your option) any later version.\n\n\ This program is distributed in the hope that it will be useful,\n\ but WITHOUT ANY WARRANTY; without even the implied warranty of\n\ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\ GNU General Public License for more details.\n\n\ You should have received a copy of the GNU General Public License\n\ along with this program; if not, write to the Free Software\n\ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */\n\n"); printf("/* This code is generated by gen_lex_hash.cc that seeks for a perfect\nhash function */\n\n"); printf("#include \"lex.h\"\n\n"); print_arrays(); printf("/* start_value=%ldL; best_t1=%ldL; best_t2=%ldL; best_type=%d; */ /* mode=%d add=%d type: %d */\n\n", best_start_value, best_t1, best_t2, best_type, best_mod, best_add, best_functype); printf("inline SYMBOL *get_hash_symbol(const char *s,unsigned int length,bool function)\n\ {\n\ ulong idx = %lu+char_table[(uchar) *s];\n\ SYMBOL *sim;\n\ const char *start=s;\n\ int i=unique_length[(uchar) *s++];\n\ if (i > (int) length) i=(int) length;\n\ while (--i > 0)\n\ idx= (idx ^ (char_table[(uchar) *s++] + (idx << %d)));\n\ idx=my_function_table[(idx & %d) %% %d];\n\ if (idx >= %d)\n\ {\n\ if (!function || idx >= %d) return (SYMBOL*) 0;\n\ sim=sql_functions + (idx - %d);\n\ }\n\ else\n\ sim=symbols + idx;\n\ if ((length != sim->length) || lex_casecmp(start,sim->name,length))\n\ return (SYMBOL *)0;\n\ return sim;\n\ }\n",(ulong) start_value,(int) function_plus,(int) how_much_and,function_mod,how_long_symbols,max_symbol,how_long_symbols); exit(0); return 0; }