Commit 1d30a23f authored by Alexander Barkov's avatar Alexander Barkov

Moving a few static functions in sql_lex.cc to new methods in Lex_input_stream

Reasoning:
- Shorter and clearer code
- Better encapsulation
  (a fair number of Lex_input_stream methods and members were
   moved to the private section)

New methods:

  int lex_token(union YYSTYPE *yylval, THD *thd);
  bool consume_comment(int remaining_recursions_permitted);
  int lex_one_token(union YYSTYPE *yylval, THD *thd);
  int find_keyword(Lex_ident_cli_st *str, uint len, bool function);
  LEX_CSTRING get_token(uint skip, uint length);

Additional changes:

- Removing Lex_input_stream::yylval.
  In the original code it was just an alias
  for the "yylval" passed to lex_one_token().
  This coding style is bug prone and is hard to follow.
  In the new reduction "yylval" (or its components) is passed to
  the affected methods as a parameter.
- Moving the code in sql_lex.h up and down between "private" and "public"
  sections (sorry if this made the diff somewhat harder to read)
parent 971268dc
...@@ -4250,7 +4250,7 @@ class THD :public Statement, ...@@ -4250,7 +4250,7 @@ class THD :public Statement,
Lex_input_stream *lip= &m_parser_state->m_lip; Lex_input_stream *lip= &m_parser_state->m_lip;
if (!yytext) if (!yytext)
{ {
if (lip->lookahead_token >= 0) if (lip->has_lookahead())
yytext= lip->get_tok_start_prev(); yytext= lip->get_tok_start_prev();
else else
yytext= lip->get_tok_start(); yytext= lip->get_tok_start();
......
This diff is collapsed.
...@@ -2145,6 +2145,16 @@ class Lex_input_stream ...@@ -2145,6 +2145,16 @@ class Lex_input_stream
void reset(char *buff, size_t length); void reset(char *buff, size_t length);
/**
The main method to scan the next token, with token contraction processing
for LALR(2) resolution, e.g. translate "WITH" followed by "ROLLUP"
to a single token WITH_ROLLUP_SYM.
*/
int lex_token(union YYSTYPE *yylval, THD *thd);
void reduce_digest_token(uint token_left, uint token_right);
private:
/** /**
Set the echo mode. Set the echo mode.
...@@ -2272,15 +2282,6 @@ class Lex_input_stream ...@@ -2272,15 +2282,6 @@ class Lex_input_stream
return m_ptr; return m_ptr;
} }
/**
End of file indicator for the query text to parse.
@return true if there are no more characters to parse
*/
bool eof()
{
return (m_ptr >= m_end_of_query);
}
/** /**
End of file indicator for the query text to parse. End of file indicator for the query text to parse.
@param n number of characters expected @param n number of characters expected
...@@ -2291,24 +2292,6 @@ class Lex_input_stream ...@@ -2291,24 +2292,6 @@ class Lex_input_stream
return ((m_ptr + n) >= m_end_of_query); return ((m_ptr + n) >= m_end_of_query);
} }
/** Get the raw query buffer. */
const char *get_buf()
{
return m_buf;
}
/** Get the pre-processed query buffer. */
const char *get_cpp_buf()
{
return m_cpp_buf;
}
/** Get the end of the raw query buffer. */
const char *get_end_of_query()
{
return m_end_of_query;
}
/** Mark the stream position as the start of a new token. */ /** Mark the stream position as the start of a new token. */
void start_token() void start_token()
{ {
...@@ -2331,6 +2314,61 @@ class Lex_input_stream ...@@ -2331,6 +2314,61 @@ class Lex_input_stream
m_cpp_tok_start= m_cpp_ptr; m_cpp_tok_start= m_cpp_ptr;
} }
/**
Get the maximum length of the utf8-body buffer.
The utf8 body can grow because of the character set conversion and escaping.
*/
size_t get_body_utf8_maximum_length(THD *thd);
/** Get the length of the current token, in the raw buffer. */
uint yyLength()
{
/*
The assumption is that the lexical analyser is always 1 character ahead,
which the -1 account for.
*/
DBUG_ASSERT(m_ptr > m_tok_start);
return (uint) ((m_ptr - m_tok_start) - 1);
}
public:
/**
Test if a lookahead token was already scanned by lex_token(),
for LALR(2) resolution.
*/
bool has_lookahead() const
{
return lookahead_token >= 0;
}
/**
End of file indicator for the query text to parse.
@return true if there are no more characters to parse
*/
bool eof()
{
return (m_ptr >= m_end_of_query);
}
/** Get the raw query buffer. */
const char *get_buf()
{
return m_buf;
}
/** Get the pre-processed query buffer. */
const char *get_cpp_buf()
{
return m_cpp_buf;
}
/** Get the end of the raw query buffer. */
const char *get_end_of_query()
{
return m_end_of_query;
}
/** Get the token start position, in the raw buffer. */ /** Get the token start position, in the raw buffer. */
const char *get_tok_start() const char *get_tok_start()
{ {
...@@ -2360,17 +2398,6 @@ class Lex_input_stream ...@@ -2360,17 +2398,6 @@ class Lex_input_stream
return m_ptr; return m_ptr;
} }
/** Get the length of the current token, in the raw buffer. */
uint yyLength()
{
/*
The assumption is that the lexical analyser is always 1 character ahead,
which the -1 account for.
*/
DBUG_ASSERT(m_ptr > m_tok_start);
return (uint) ((m_ptr - m_tok_start) - 1);
}
/** Get the previus token start position, in the pre-processed buffer. */ /** Get the previus token start position, in the pre-processed buffer. */
const char *get_cpp_start_prev() const char *get_cpp_start_prev()
{ {
...@@ -2434,12 +2461,6 @@ class Lex_input_stream ...@@ -2434,12 +2461,6 @@ class Lex_input_stream
return (size_t) (m_body_utf8_ptr - m_body_utf8); return (size_t) (m_body_utf8_ptr - m_body_utf8);
} }
/**
Get the maximum length of the utf8-body buffer.
The utf8 body can grow because of the character set conversion and escaping.
*/
size_t get_body_utf8_maximum_length(THD *thd);
void body_utf8_start(THD *thd, const char *begin_ptr); void body_utf8_start(THD *thd, const char *begin_ptr);
void body_utf8_append(const char *ptr); void body_utf8_append(const char *ptr);
void body_utf8_append(const char *ptr, const char *end_ptr); void body_utf8_append(const char *ptr, const char *end_ptr);
...@@ -2451,15 +2472,8 @@ class Lex_input_stream ...@@ -2451,15 +2472,8 @@ class Lex_input_stream
CHARSET_INFO *txt_cs, CHARSET_INFO *txt_cs,
const char *end_ptr, const char *end_ptr,
my_wc_t sep); my_wc_t sep);
/** Current thread. */
THD *m_thd;
/** Current line number. */
uint yylineno;
/** Interface with bison, value of the last token parsed. */
LEX_YYSTYPE yylval;
private:
/** /**
LALR(2) resolution, look ahead token. LALR(2) resolution, look ahead token.
Value of the next token to return, if any, Value of the next token to return, if any,
...@@ -2476,15 +2490,20 @@ class Lex_input_stream ...@@ -2476,15 +2490,20 @@ class Lex_input_stream
void add_digest_token(uint token, LEX_YYSTYPE yylval); void add_digest_token(uint token, LEX_YYSTYPE yylval);
void reduce_digest_token(uint token_left, uint token_right); bool consume_comment(int remaining_recursions_permitted);
int lex_one_token(union YYSTYPE *yylval, THD *thd);
int find_keyword(Lex_ident_cli_st *str, uint len, bool function);
LEX_CSTRING get_token(uint skip, uint length);
int scan_ident_sysvar(THD *thd, Lex_ident_cli_st *str); int scan_ident_sysvar(THD *thd, Lex_ident_cli_st *str);
int scan_ident_start(THD *thd, Lex_ident_cli_st *str); int scan_ident_start(THD *thd, Lex_ident_cli_st *str);
int scan_ident_middle(THD *thd, Lex_ident_cli_st *str, int scan_ident_middle(THD *thd, Lex_ident_cli_st *str,
CHARSET_INFO **cs, my_lex_states *); CHARSET_INFO **cs, my_lex_states *);
int scan_ident_delimited(THD *thd, Lex_ident_cli_st *str); int scan_ident_delimited(THD *thd, Lex_ident_cli_st *str);
bool get_7bit_or_8bit_ident(THD *thd, uchar *last_char); bool get_7bit_or_8bit_ident(THD *thd, uchar *last_char);
private:
/** Current thread. */
THD *m_thd;
/** Pointer to the current position in the raw input stream. */ /** Pointer to the current position in the raw input stream. */
char *m_ptr; char *m_ptr;
...@@ -2570,6 +2589,15 @@ class Lex_input_stream ...@@ -2570,6 +2589,15 @@ class Lex_input_stream
*/ */
bool multi_statements; bool multi_statements;
/** Current line number. */
uint yylineno;
/**
Current statement digest instrumentation.
*/
sql_digest_state* m_digest;
private:
/** State of the lexical analyser for comments. */ /** State of the lexical analyser for comments. */
enum_comment_state in_comment; enum_comment_state in_comment;
enum_comment_state in_comment_saved; enum_comment_state in_comment_saved;
...@@ -2596,13 +2624,9 @@ class Lex_input_stream ...@@ -2596,13 +2624,9 @@ class Lex_input_stream
NOTE: this member must be used within MYSQLlex() function only. NOTE: this member must be used within MYSQLlex() function only.
*/ */
CHARSET_INFO *m_underscore_cs; CHARSET_INFO *m_underscore_cs;
/**
Current statement digest instrumentation.
*/
sql_digest_state* m_digest;
}; };
/** /**
Abstract representation of a statement. Abstract representation of a statement.
This class is an interface between the parser and the runtime. This class is an interface between the parser and the runtime.
......
...@@ -17365,7 +17365,7 @@ trigger_tail: ...@@ -17365,7 +17365,7 @@ trigger_tail:
FOR token is already passed through (see 'case FOR_SYM' in sql_lex.cc), FOR token is already passed through (see 'case FOR_SYM' in sql_lex.cc),
so we use _prev() to get it back. so we use _prev() to get it back.
*/ */
DBUG_ASSERT(YYLIP->lookahead_token >= 0); DBUG_ASSERT(YYLIP->has_lookahead());
Lex->raw_trg_on_table_name_end= YYLIP->get_tok_start_prev(); Lex->raw_trg_on_table_name_end= YYLIP->get_tok_start_prev();
} }
EACH_SYM EACH_SYM
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment