gen_lex_hash.cc
上传用户:tsgydb
上传日期:2007-04-14
资源大小:10674k
文件大小:16k
- /* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
- #define NO_YACC_SYMBOLS
- #include <global.h>
- #include <my_sys.h>
- #include <m_string.h>
- #ifndef __GNU_LIBRARY__
- #define __GNU_LIBRARY__ // Skipp warnings in getopt.h
- #endif
- #include <getopt.h>
- #include "mysql_version.h"
- #include "lex.h"
- bool opt_search=0,opt_verbose=0;
- ulong opt_count=100000;
- #define max_allowed_array 8000 // Don't generate bigger arrays than this
- #define max_symbol 32767 // Use this for 'not found'
- #define how_much_for_plus 8 // 2-8
- #define type_count 1 // 1-5
- #define char_table_count 5
- #define total_symbols (sizeof(symbols)/sizeof(SYMBOL) +
- sizeof(sql_functions)/sizeof(SYMBOL))
- #define how_much_and INT_MAX24
- /*
- The following only have to work with characters in the set
- used by SQL commands
- */
- #undef tolower
- #define tolower(a) ((a) >= 'A' && (a) <= 'Z') ? ((a)- 'A' + 'a') : (a)
- static uint how_long_symbols,function_plus,function_mod,function_type;
- static uint char_table[256];
- static uchar unique_length[256];
- static uchar bits[how_much_and/8+1];
- static uint primes[max_allowed_array+1];
- static ulong hash_results[type_count][how_much_for_plus+1][total_symbols];
- static ulong start_value=0;
- struct rand_struct {
- unsigned long seed1,seed2,max_value;
- double max_value_dbl;
- };
- void randominit(struct rand_struct *rand_st,ulong seed1, ulong seed2)
- { /* For mysql 3.21.# */
- rand_st->max_value= 0x3FFFFFFFL;
- rand_st->max_value_dbl=(double) rand_st->max_value;
- rand_st->seed1=seed1%rand_st->max_value ;
- rand_st->seed2=seed2%rand_st->max_value;
- }
- double rnd(struct rand_struct *rand_st)
- {
- rand_st->seed1=(rand_st->seed1*3+rand_st->seed2) % rand_st->max_value;
- rand_st->seed2=(rand_st->seed1+rand_st->seed2+33) % rand_st->max_value;
- return (((double) rand_st->seed1)/rand_st->max_value_dbl);
- }
- static void make_char_table(ulong t1,ulong t2,int type)
- {
- uint i;
- struct rand_struct rand_st;
- randominit(&rand_st,t1,t2);
- for (i=0 ; i < 256 ; i++)
- {
- switch (type) {
- case 0: char_table[i]= i + (i << 8); break;
- case 1: char_table[i]= i + ((i ^255 ) << 8); break;
- case 2: char_table[i]= i; break;
- case 3: char_table[i]= i + ((uint) (rnd(&rand_st)*255) << 8); break;
- case 4: char_table[i]= (uint) (rnd(&rand_st)*255) + (i << 8); break;
- }
- }
- char_table[0]|=1+257; // Avoid problems with 0
- for (i=0 ; i < 256 ; i++)
- {
- uint tmp=(uint) (rnd(&rand_st)*255);
- swap(uint,char_table[i],char_table[tmp]);
- }
- /* lower characters should be mapped to upper */
- for (i= 'a' ; i <= 'z' ; i++)
- {
- /* This loop is coded with extra variables to avoid a bug in gcc 2.96 */
- uchar tmp= (uchar) (i - 'a'); // Assume ascii
- tmp+='A';
- char_table[i]=char_table[tmp];
- }
- }
- /* Fill array primes with primes between start and 'max_allowed_array' */
- static void make_prime_array(uint start)
- {
- uint i,j,*to;
- uint max_index=(uint) sqrt((double) max_allowed_array);
- bzero((char*) primes,sizeof(primes[0])*max_allowed_array);
- i=2;
- while (i < max_index)
- {
- for (j=i+i ; j <= max_allowed_array ; j+=i)
- primes[j]=1;
- while (primes[++i]) ;
- }
- to=primes;
- for (i=start ; i <= max_allowed_array ; i++)
- if (!primes[i])
- *to++=i;
- *to=0; // end marker
- }
- #define USE_char_table
- static ulong tab_index_function(const char *s,uint add, uint type)
- {
- register ulong nr=start_value+char_table[(uchar) *s]; // Nice value
- ulong pos=3;
- uint tmp_length=unique_length[(uchar) *s]-1;
- while (*++s && tmp_length-- > 0)
- {
- switch (type) {
- case 0:
- nr= (nr ^ (char_table[(uchar) *s] + (nr << add)));
- break;
- case 1:
- nr= (nr + (char_table[(uchar) *s] + (nr << add)));
- break;
- case 2:
- nr= (nr ^ (char_table[(uchar) *s] ^ (nr << add)));
- break;
- case 3:
- nr= (char_table[(uchar) *s] ^ (nr << add));
- break;
- case 4:
- nr+= nr+nr+((nr & 63)+pos)*((ulong) char_table[(uchar) *s]);
- pos+=add;
- break;
- }
- }
- return nr & INT_MAX24;
- }
- static int search(bool write_warning)
- {
- uint size_symbols = sizeof(symbols)/sizeof(SYMBOL);
- uint size_functions = sizeof(sql_functions)/sizeof(SYMBOL);
- uint size=size_symbols + size_functions;
- uint i=0,found,*prime,type;
- int igra[max_allowed_array],test_count=INT_MAX;
- uint possible_plus[how_much_for_plus*type_count+type_count];
- how_long_symbols = sizeof(symbols)/sizeof(SYMBOL);
- bzero((char*) possible_plus,sizeof(possible_plus));
- found=0;
- /* Check first which function_plus are possible */
- for (type=0 ; type < type_count ; type ++)
- {
- for (function_plus = 1;
- function_plus <= how_much_for_plus;
- function_plus++)
- {
- bzero((char*) bits,sizeof(bits));
- for (i=0; i < size; i++)
- {
- ulong order= tab_index_function ((i < how_long_symbols) ?
- symbols[i].name :
- sql_functions[i-how_long_symbols].name,
- function_plus, type);
- hash_results[type][function_plus][i]=order;
- uint pos=order/8;
- uint bit=order & 7;
- if (bits[pos] & (1 << bit))
- break;
- bits[pos]|=1 << bit;
- }
- if (i == size)
- {
- possible_plus[found++]=function_plus;
- }
- }
- possible_plus[found++]=0; // End marker
- }
- if (found == type_count)
- {
- if (write_warning)
- fprintf(stderr,"
- The hash function didn't return a unique value for any parametern
- You have to change gen_lex_code.cc, function 'tab_index_function' ton
- generate unique values for some parameter. When you have succeeded in this,n
- you have to change 'main' to print out the new functionn");
- return(1);
- }
- if (opt_verbose)
- fprintf (stderr,"Info: Possible add values: %dn",found-type_count);
- for (prime=primes; (function_mod=*prime) ; prime++)
- {
- uint *plus_ptr=possible_plus;
- for (type=0 ; type < type_count ; type++ )
- {
- while ((function_plus= *plus_ptr++))
- {
- ulong *order_pos= &hash_results[type][function_plus][0];
- if (test_count++ == INT_MAX)
- {
- test_count=1;
- bzero((char*) igra,sizeof(igra));
- }
- for (i=0; i<size ;i++)
- {
- ulong order;
- order = *order_pos++ % function_mod;
- if (igra[order] == test_count)
- break;
- igra[order] = test_count;
- }
- if (i == size)
- {
- *prime=0; // Mark this used
- function_type=type;
- return 0; // Found ok value
- }
- }
- }
- }
- function_mod=max_allowed_array;
- if (write_warning)
- fprintf (stderr,"Fatal error when generating hash for symbolsn
- Didn't find suitable values for perfect hashing:n
- You have to edit gen_lex_hash.cc to generate a new hashing function.n
- You can try running gen_lex_hash with --search to find a suitable valuen
- Symbol array size = %dn",function_mod);
- return -1;
- }
- void print_arrays()
- {
- uint size_symbols = sizeof(symbols)/sizeof(SYMBOL);
- uint size_functions = sizeof(sql_functions)/sizeof(SYMBOL);
- uint size=size_symbols + size_functions;
- uint i;
- fprintf(stderr,"Symbols: %d Functions: %d; Total: %dnShifts per char: %d, Array size: %dn",
- size_symbols,size_functions,size_symbols+size_functions,
- function_plus,function_mod);
- int *prva= (int*) my_alloca(sizeof(int)*function_mod);
- for (i=0 ; i <= function_mod; i++)
- prva[i]= max_symbol;
- for (i=0;i<size;i++)
- {
- ulong order = tab_index_function ((i < how_long_symbols) ? symbols[i].name : sql_functions[i - how_long_symbols].name,function_plus,function_type);
- order %= function_mod;
- prva [order] = i;
- }
- #ifdef USE_char_table
- printf("static uint16 char_table[] = {n");
- for (i=0; i < 255 ;i++) // < 255 is correct
- {
- printf("%u,",char_table[i]);
- if (((i+1) & 15) == 0)
- puts("");
- }
- printf("%dn};nnn",char_table[i]);
- #endif
- printf("static uchar unique_length[] = {n");
- for (i=0; i < 255 ;i++) // < 255 is correct
- {
- printf("%u,",unique_length[i]);
- if (((i+1) & 15) == 0)
- puts("");
- }
- printf("%dn};nnn",unique_length[i]);
- printf("static uint16 my_function_table[] = {n");
- for (i=0; i < function_mod-1 ;i++)
- {
- printf("%d,",prva[i]);
- if (((i+1) % 12) == 0)
- puts("");
- }
- printf("%dn};nnn",prva[i]);
- my_afree((gptr) prva);
- }
- static struct option long_options[] =
- {
- {"count", required_argument, 0, 'c'},
- {"search", no_argument, 0, 'S'},
- {"verbose", no_argument, 0, 'v'},
- {"version", no_argument, 0, 'V'},
- {"rnd1", required_argument, 0, 'r'},
- {"rnd2", required_argument, 0, 'R'},
- {"type", required_argument, 0, 't'},
- {0, 0, 0, 0}
- };
- static void usage(int version)
- {
- printf("%s Ver 3.2 Distrib %s, for %s (%s)n",
- my_progname, MYSQL_SERVER_VERSION, SYSTEM_TYPE, MACHINE_TYPE);
- if (version)
- return;
- puts("Copyright (C) 2000 MySQL AB & MySQL Finland AB, by Sinisa and Monty");
- puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,nand you are welcome to modify and redistribute it under the GPL licensen");
- puts("This program generates a perfect hashing function for the sql_lex.cc");
- printf("Usage: %s [OPTIONS]n", my_progname);
- printf("n
- -c, --count=# Try count times to find a optimal hash tablen
- -r, --rnd1=# Set 1 part of rnd value for hash generatorn
- -R, --rnd2=# Set 2 part of rnd value for hash generatorn
- -t, --type=# Set type of char table to generaten
- -S, --search Search after good rnd1 and rnd2 valuesn
- -v, --verbose Write some information while the program executesn
- -V, --version Output version information and exitn");
- }
- static uint best_type;
- static ulong best_t1,best_t2, best_start_value;
- static int get_options(int argc, char **argv)
- {
- int c,option_index=0;
- while ((c=getopt_long(argc,argv,"?SvVc:r:R:t:",
- long_options, &option_index)) != EOF)
- {
- switch(c) {
- case 'c':
- opt_count=atol(optarg);
- break;
- case 'r':
- best_t1=atol(optarg);
- break;
- case 'R':
- best_t2=atol(optarg);
- break;
- case 't':
- best_type=atoi(optarg);
- break;
- case 'S':
- opt_search=1;
- break;
- case 'v':
- opt_verbose=1;
- break;
- case 'V': usage(1); exit(0);
- case 'I':
- case '?':
- usage(0);
- exit(0);
- default:
- fprintf(stderr,"illegal option: -%cn",opterr);
- usage(0);
- exit(1);
- }
- }
- argc-=optind;
- argv+=optind;
- if (argc >= 1)
- {
- usage(0);
- exit(1);
- }
- return(0);
- }
- static uint max_prefix(const char *name)
- {
- uint i;
- uint max_length=1;
- for (i=0 ; i < sizeof(symbols)/sizeof(SYMBOL) ; i++)
- {
- const char *str=symbols[i].name;
- if (str != name)
- {
- const char *str2=name;
- uint length;
- while (*str && *str == *str2)
- {
- str++;
- str2++;
- }
- length=(uint) (str2 - name)+1;
- if (length > max_length)
- max_length=length;
- }
- }
- for (i=0 ; i < sizeof(sql_functions)/sizeof(SYMBOL) ; i++)
- {
- const char *str=sql_functions[i].name;
- if (str != name)
- {
- const char *str2=name;
- uint length;
- while (*str && *str == *str2)
- {
- str++;
- str2++;
- }
- length=(uint) (str2 - name)+1;
- if (length > max_length)
- max_length=length;
- }
- }
- return max_length;
- }
- static void make_max_length_table(void)
- {
- uint i;
- for (i=0 ; i < sizeof(symbols)/sizeof(SYMBOL) ; i++)
- {
- uint length=max_prefix(symbols[i].name);
- if (length > unique_length[(uchar) symbols[i].name[0]])
- {
- unique_length[(uchar) symbols[i].name[0]]=length;
- unique_length[(uchar) tolower(symbols[i].name[0])]=length;
- }
- }
- for (i=0 ; i < sizeof(sql_functions)/sizeof(SYMBOL) ; i++)
- {
- uint length=max_prefix(sql_functions[i].name);
- if (length > unique_length[(uchar) sql_functions[i].name[0]])
- {
- unique_length[(uchar) sql_functions[i].name[0]]=length;
- unique_length[(uchar) tolower(sql_functions[i].name[0])]=length;
- }
- }
- }
- int main(int argc,char **argv)
- {
- struct rand_struct rand_st;
- static uint best_mod,best_add,best_functype;
- int error;
- MY_INIT(argv[0]);
- start_value=5315771L; best_t1=6916833L; best_t2=3813748L; best_type=3; /* mode=5839 add=5 type: 0 */
- if (get_options(argc,(char **) argv))
- exit(1);
- make_max_length_table();
- make_char_table(best_t1,best_t2,best_type);
- make_prime_array(sizeof(symbols)/sizeof(SYMBOL) +
- sizeof(sql_functions)/sizeof(SYMBOL));
- if ((error=search(1)) > 0 || error && !opt_search)
- exit(1); // This should work
- best_mod=function_mod; best_add=function_plus; best_functype=function_type;
- if (opt_search)
- {
- time_t start_time=time((time_t*) 0);
- randominit(&rand_st,start_time,start_time/2); // Some random values
- printf("start_value=%ldL; best_t1=%ldL; best_t2=%ldL; best_type=%d; /* mode=%d add=%d type: %d */n",
- start_value, best_t1,best_t2,best_type,best_mod,best_add,
- best_functype);
- for (uint i=1 ; i <= opt_count ; i++)
- {
- if (i % 10 == 0)
- {
- putchar('.');
- fflush(stdout);
- }
- ulong t1=(ulong) (rnd(&rand_st)*INT_MAX24);
- ulong t2=(ulong) (rnd(&rand_st)*INT_MAX24);
- uint type=(int) (rnd(&rand_st)*char_table_count);
- start_value=(ulong) (rnd(&rand_st)*INT_MAX24);
- make_char_table(t1,t2,type);
- if (!search(0))
- {
- best_mod=function_mod; best_add=function_plus;
- best_functype=function_type;
- best_t1=t1; best_t2=t2; best_type=type;
- best_start_value=start_value;
- printf("nstart_value=%ldL; best_t1=%ldL; best_t2=%ldL; best_type=%d; /* mode=%d add=%d type: %d */n",
- best_start_value,best_t1,best_t2,best_type,best_mod,best_add,
- best_functype);
- }
- }
- }
- function_mod=best_mod; function_plus=best_add;
- make_char_table(best_t1,best_t2,best_type);
- printf("/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult ABn
- This program is free software; you can redistribute it and/or modifyn
- it under the terms of the GNU General Public License as published byn
- the Free Software Foundation; either version 2 of the License, orn
- (at your option) any later version.nn
- This program is distributed in the hope that it will be useful,n
- but WITHOUT ANY WARRANTY; without even the implied warranty ofn
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See then
- GNU General Public License for more details.nn
- You should have received a copy of the GNU General Public Licensen
- along with this program; if not, write to the Free Softwaren
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */nn");
- printf("/* This code is generated by gen_lex_hash.cc that seeks for a perfectnhash function */nn");
- printf("#include "lex.h"nn");
- print_arrays();
- printf("/* start_value=%ldL; best_t1=%ldL; best_t2=%ldL; best_type=%d; */ /* mode=%d add=%d type: %d */nn",
- best_start_value, best_t1, best_t2, best_type,
- best_mod, best_add, best_functype);
- printf("inline SYMBOL *get_hash_symbol(const char *s,unsigned int length,bool function)n
- {n
- ulong idx = %lu+char_table[(uchar) *s];n
- SYMBOL *sim;n
- const char *start=s;n
- int i=unique_length[(uchar) *s++];n
- if (i > (int) length) i=(int) length;n
- while (--i > 0)n
- idx= (idx ^ (char_table[(uchar) *s++] + (idx << %d)));n
- idx=my_function_table[(idx & %d) %% %d];n
- if (idx >= %d)n
- {n
- if (!function || idx >= %d) return (SYMBOL*) 0;n
- sim=sql_functions + (idx - %d);n
- }n
- elsen
- sim=symbols + idx;n
- if ((length != sim->length) || lex_casecmp(start,sim->name,length))n
- return (SYMBOL *)0;n
- return sim;n
- }n",(ulong) start_value,(int) function_plus,(int) how_much_and,function_mod,how_long_symbols,max_symbol,how_long_symbols);
- exit(0);
- return 0;
- }