Commit 96212cc3 authored by monty@mashka.mysql.fi's avatar monty@mashka.mysql.fi

Faster parsing of identifiers

Compatibility syntax: SERIAL, [PRIMARY] KEY and VALUE
parent b201dfec
...@@ -76,7 +76,7 @@ inline int lex_casecmp(const char *s, const char *t, uint len) ...@@ -76,7 +76,7 @@ inline int lex_casecmp(const char *s, const char *t, uint len)
#include "lex_hash.h" #include "lex_hash.h"
static uchar state_map[256]; static uchar state_map[256], ident_map[256];
void lex_init(void) void lex_init(void)
...@@ -91,7 +91,7 @@ void lex_init(void) ...@@ -91,7 +91,7 @@ void lex_init(void)
VOID(pthread_key_create(&THR_LEX,NULL)); VOID(pthread_key_create(&THR_LEX,NULL));
/* Fill state_map with states to get a faster parser */ /* Fill state_map with states to get a faster parser */
for (i=0; i < 256 ; i++) for (i=0; i < sizeof(state_map) ; i++)
{ {
if (my_isalpha(system_charset_info,i)) if (my_isalpha(system_charset_info,i))
state_map[i]=(uchar) STATE_IDENT; state_map[i]=(uchar) STATE_IDENT;
...@@ -126,6 +126,20 @@ void lex_init(void) ...@@ -126,6 +126,20 @@ void lex_init(void)
{ {
state_map[(uchar) '"'] = STATE_USER_VARIABLE_DELIMITER; state_map[(uchar) '"'] = STATE_USER_VARIABLE_DELIMITER;
} }
/*
Create a second map to make it faster to find identifiers
*/
for (i=0; i < sizeof(ident_map) ; i++)
{
ident_map[i]= (uchar) (state_map[i] == STATE_IDENT ||
state_map[i] == STATE_NUMBER_IDENT);
}
/* Special handling of hex and binary strings */
state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) STATE_IDENT_OR_HEX;
state_map[(uchar)'b']= state_map[(uchar)'b']= (uchar) STATE_IDENT_OR_BIN;
DBUG_VOID_RETURN; DBUG_VOID_RETURN;
} }
...@@ -459,7 +473,7 @@ int yylex(void *arg) ...@@ -459,7 +473,7 @@ int yylex(void *arg)
} }
case STATE_CHAR: // Unknown or single char token case STATE_CHAR: // Unknown or single char token
case STATE_SKIP: // This should not happen case STATE_SKIP: // This should not happen
yylval->lex_str.str=(char*) (lex->ptr=lex->tok_start);// Set to first char yylval->lex_str.str=(char*) (lex->ptr=lex->tok_start);// Set to first chr
yylval->lex_str.length=1; yylval->lex_str.length=1;
c=yyGet(); c=yyGet();
if (c != ')') if (c != ')')
...@@ -468,12 +482,15 @@ int yylex(void *arg) ...@@ -468,12 +482,15 @@ int yylex(void *arg)
lex->tok_start=lex->ptr; // Let tok_start point at next item lex->tok_start=lex->ptr; // Let tok_start point at next item
return((int) c); return((int) c);
case STATE_IDENT: // Incomplete keyword or ident case STATE_IDENT_OR_HEX:
if ((c == 'x' || c == 'X') && yyPeek() == '\'') if (yyPeek() == '\'')
{ // Found x'hex-number' { // Found x'hex-number'
state=STATE_HEX_NUMBER; state= STATE_HEX_NUMBER;
break; break;
} }
/* Fall through */
case STATE_IDENT_OR_BIN: // TODO: Add binary string handling
case STATE_IDENT:
#if defined(USE_MB) && defined(USE_MB_IDENT) #if defined(USE_MB) && defined(USE_MB_IDENT)
if (use_mb(system_charset_info)) if (use_mb(system_charset_info))
{ {
...@@ -488,8 +505,7 @@ int yylex(void *arg) ...@@ -488,8 +505,7 @@ int yylex(void *arg)
} }
lex->ptr += l - 1; lex->ptr += l - 1;
} }
while (state_map[c=yyGet()] == STATE_IDENT || while (ident_map[c=yyGet()])
state_map[c] == STATE_NUMBER_IDENT)
{ {
if (my_ismbhead(system_charset_info, c)) if (my_ismbhead(system_charset_info, c))
{ {
...@@ -504,15 +520,13 @@ int yylex(void *arg) ...@@ -504,15 +520,13 @@ int yylex(void *arg)
} }
else else
#endif #endif
while (state_map[c=yyGet()] == STATE_IDENT || while (ident_map[c=yyGet()]) ;
state_map[c] == STATE_NUMBER_IDENT) ;
length= (uint) (lex->ptr - lex->tok_start)-1; length= (uint) (lex->ptr - lex->tok_start)-1;
if (lex->ignore_space) if (lex->ignore_space)
{ {
for (; state_map[c] == STATE_SKIP ; c= yyGet()); for (; state_map[c] == STATE_SKIP ; c= yyGet());
} }
if (c == '.' && (state_map[yyPeek()] == STATE_IDENT || if (c == '.' && ident_map[yyPeek()])
state_map[yyPeek()] == STATE_NUMBER_IDENT))
lex->next_state=STATE_IDENT_SEP; lex->next_state=STATE_IDENT_SEP;
else else
{ // '(' must follow directly if function { // '(' must follow directly if function
...@@ -550,7 +564,7 @@ int yylex(void *arg) ...@@ -550,7 +564,7 @@ int yylex(void *arg)
case STATE_NUMBER_IDENT: // number or ident which num-start case STATE_NUMBER_IDENT: // number or ident which num-start
while (my_isdigit(system_charset_info,(c = yyGet()))) ; while (my_isdigit(system_charset_info,(c = yyGet()))) ;
if (state_map[c] != STATE_IDENT) if (!ident_map[c])
{ // Can't be identifier { // Can't be identifier
state=STATE_INT_OR_REAL; state=STATE_INT_OR_REAL;
break; break;
...@@ -575,7 +589,7 @@ int yylex(void *arg) ...@@ -575,7 +589,7 @@ int yylex(void *arg)
lex->tok_start[0] == '0' ) lex->tok_start[0] == '0' )
{ // Varbinary { // Varbinary
while (my_isxdigit(system_charset_info,(c = yyGet()))) ; while (my_isxdigit(system_charset_info,(c = yyGet()))) ;
if ((lex->ptr - lex->tok_start) >= 4 && state_map[c] != STATE_IDENT) if ((lex->ptr - lex->tok_start) >= 4 && !ident_map[c])
{ {
yylval->lex_str=get_token(lex,yyLength()); yylval->lex_str=get_token(lex,yyLength());
yylval->lex_str.str+=2; // Skip 0x yylval->lex_str.str+=2; // Skip 0x
...@@ -602,8 +616,7 @@ int yylex(void *arg) ...@@ -602,8 +616,7 @@ int yylex(void *arg)
} }
lex->ptr += l - 1; lex->ptr += l - 1;
} }
while (state_map[c=yyGet()] == STATE_IDENT || while (ident_map[c=yyGet()])
state_map[c] == STATE_NUMBER_IDENT)
{ {
if (my_ismbhead(system_charset_info, c)) if (my_ismbhead(system_charset_info, c))
{ {
...@@ -618,11 +631,9 @@ int yylex(void *arg) ...@@ -618,11 +631,9 @@ int yylex(void *arg)
} }
else else
#endif #endif
while (state_map[c = yyGet()] == STATE_IDENT || while (ident_map[c = yyGet()]) ;
state_map[c] == STATE_NUMBER_IDENT) ;
if (c == '.' && (state_map[yyPeek()] == STATE_IDENT || if (c == '.' && ident_map[yyPeek()])
state_map[yyPeek()] == STATE_NUMBER_IDENT))
lex->next_state=STATE_IDENT_SEP;// Next is '.' lex->next_state=STATE_IDENT_SEP;// Next is '.'
// fall through // fall through
...@@ -900,8 +911,7 @@ int yylex(void *arg) ...@@ -900,8 +911,7 @@ int yylex(void *arg)
[(global | local | session) .]variable_name [(global | local | session) .]variable_name
*/ */
while (state_map[c=yyGet()] == STATE_IDENT || while (ident_map[c=yyGet()]) ;
state_map[c] == STATE_NUMBER_IDENT) ;
if (c == '.') if (c == '.')
lex->next_state=STATE_IDENT_SEP; lex->next_state=STATE_IDENT_SEP;
length= (uint) (lex->ptr - lex->tok_start)-1; length= (uint) (lex->ptr - lex->tok_start)-1;
......
...@@ -78,7 +78,7 @@ enum lex_states ...@@ -78,7 +78,7 @@ enum lex_states
STATE_REAL_OR_POINT, STATE_BOOL, STATE_EOL, STATE_ESCAPE, STATE_LONG_COMMENT, STATE_REAL_OR_POINT, STATE_BOOL, STATE_EOL, STATE_ESCAPE, STATE_LONG_COMMENT,
STATE_END_LONG_COMMENT, STATE_COLON, STATE_SET_VAR, STATE_USER_END, STATE_END_LONG_COMMENT, STATE_COLON, STATE_SET_VAR, STATE_USER_END,
STATE_HOSTNAME, STATE_SKIP, STATE_USER_VARIABLE_DELIMITER, STATE_SYSTEM_VAR, STATE_HOSTNAME, STATE_SKIP, STATE_USER_VARIABLE_DELIMITER, STATE_SYSTEM_VAR,
STATE_IDENT_OR_KEYWORD STATE_IDENT_OR_KEYWORD, STATE_IDENT_OR_HEX, STATE_IDENT_OR_BIN
}; };
......
...@@ -1110,6 +1110,12 @@ type: ...@@ -1110,6 +1110,12 @@ type:
$$=FIELD_TYPE_SET; $$=FIELD_TYPE_SET;
} }
| LONG_SYM opt_binary { $$=FIELD_TYPE_MEDIUM_BLOB; } | LONG_SYM opt_binary { $$=FIELD_TYPE_MEDIUM_BLOB; }
| SERIAL_SYM
{
$$=FIELD_TYPE_LONGLONG;
Lex->type|= (AUTO_INCREMENT_FLAG | NOT_NULL_FLAG | UNSIGNED_FLAG |
UNIQUE_FLAG);
}
; ;
char: char:
...@@ -1184,12 +1190,13 @@ attribute: ...@@ -1184,12 +1190,13 @@ attribute:
| DEFAULT literal { Lex->default_value=$2; } | DEFAULT literal { Lex->default_value=$2; }
| AUTO_INC { Lex->type|= AUTO_INCREMENT_FLAG | NOT_NULL_FLAG; } | AUTO_INC { Lex->type|= AUTO_INCREMENT_FLAG | NOT_NULL_FLAG; }
| SERIAL_SYM DEFAULT VALUE_SYM | SERIAL_SYM DEFAULT VALUE_SYM
{ Lex->type|= AUTO_INCREMENT_FLAG | NOT_NULL_FLAG; } { Lex->type|= AUTO_INCREMENT_FLAG | NOT_NULL_FLAG | UNIQUE_FLAG; }
| PRIMARY_SYM KEY_SYM { Lex->type|= PRI_KEY_FLAG | NOT_NULL_FLAG; } | opt_primary KEY_SYM { Lex->type|= PRI_KEY_FLAG | NOT_NULL_FLAG; }
| UNIQUE_SYM { Lex->type|= UNIQUE_FLAG; } | UNIQUE_SYM { Lex->type|= UNIQUE_FLAG; }
| UNIQUE_SYM KEY_SYM { Lex->type|= UNIQUE_KEY_FLAG; } | UNIQUE_SYM KEY_SYM { Lex->type|= UNIQUE_KEY_FLAG; }
| COMMENT_SYM text_literal { Lex->comment= $2; }; | COMMENT_SYM text_literal { Lex->comment= $2; };
charset_name: charset_name:
BINARY BINARY
{ {
...@@ -1227,6 +1234,11 @@ opt_binary: ...@@ -1227,6 +1234,11 @@ opt_binary:
| BINARY { Lex->charset=my_charset_bin; } | BINARY { Lex->charset=my_charset_bin; }
| CHAR_SYM SET charset_name { Lex->charset=$3; } ; | CHAR_SYM SET charset_name { Lex->charset=$3; } ;
opt_primary:
/* empty */
| PRIMARY_SYM
references: references:
REFERENCES table_ident REFERENCES table_ident
{ {
...@@ -2882,6 +2894,7 @@ fields: ...@@ -2882,6 +2894,7 @@ fields:
insert_values: insert_values:
VALUES values_list {} VALUES values_list {}
| VALUE_SYM values_list {}
| SELECT_SYM | SELECT_SYM
{ {
LEX *lex=Lex; LEX *lex=Lex;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment