MDEV-16117 SP with a single FOR statement creates but further fails to load

The code in the "sp_tail" rule in sql_yacc.yy always used YYLIP->get_cpp_tok_start() as the start of the body, and did not check for possible lookahead which happens for keywords "FOR", "VALUES" and "WITH" for LALR(2) resolution in Lex_input_stream::lex_token(). In case of the lookahead token presence, get_tok_start_prev() should have been used instead of get_cpp_tok_start() as the beginning of the SP body. Change summary: This patch hides the implementation of the lookahead token completely inside Lex_input_stream. The users of Lex_input_stream now just get token-by-token transparently and should not care about lookahead any more. Now external users of Lex_input_stream are not aware of the lookahead token at all. Change details: - Moving Lex_input_stream::has_lookahead() into the "private" section. - Removing Lex_input_stream::get_tok_start_prev() and Lex_input_stream::get_cpp_start_prev(). - Fixing the external code to call get_tok_start() and get_cpp_tok_start() in all places where get_tok_start_prev() and get_cpp_start_prev() where used. - Adding a test for has_lookahead() right inside get_tok_start() and get_cpp_tok_start(). If there is a lookahead token, these methods now return the position of the previous token automatically: const char *get_tok_start() { return has_lookahead() ? m_tok_start_prev : m_tok_start; } const char *get_cpp_tok_start() { return has_lookahead() ? m_cpp_tok_start_prev : m_cpp_tok_start; } - Fixing the internal code inside Lex_input_stream methods to use m_tok_start and m_cpp_tok_start directly, instead of calling get_tok_start() and get_cpp_tok_start(), to make sure to access to the *current* token position (independently of a lookahead token presence).

MDEV-16117 SP with a single FOR statement creates but further fails to load
The code in the "sp_tail" rule in sql_yacc.yy always used YYLIP->get_cpp_tok_start() as the start of the body, and did not check for possible lookahead which happens for keywords "FOR", "VALUES" and "WITH" for LALR(2) resolution in Lex_input_stream::lex_token(). In case of the lookahead token presence, get_tok_start_prev() should have been used instead of get_cpp_tok_start() as the beginning of the SP body. Change summary: This patch hides the implementation of the lookahead token completely inside Lex_input_stream. The users of Lex_input_stream now just get token-by-token transparently and should not care about lookahead any more. Now external users of Lex_input_stream are not aware of the lookahead token at all. Change details: - Moving Lex_input_stream::has_lookahead() into the "private" section. - Removing Lex_input_stream::get_tok_start_prev() and Lex_input_stream::get_cpp_start_prev(). - Fixing the external code to call get_tok_start() and get_cpp_tok_start() in all places where get_tok_start_prev() and get_cpp_start_prev() where used. - Adding a test for has_lookahead() right inside get_tok_start() and get_cpp_tok_start(). If there is a lookahead token, these methods now return the position of the previous token automatically: const char *get_tok_start() { return has_lookahead() ? m_tok_start_prev : m_tok_start; } const char *get_cpp_tok_start() { return has_lookahead() ? m_cpp_tok_start_prev : m_cpp_tok_start; } - Fixing the internal code inside Lex_input_stream methods to use m_tok_start and m_cpp_tok_start directly, instead of calling get_tok_start() and get_cpp_tok_start(), to make sure to access to the *current* token position (independently of a lookahead token presence).
fc63c1e1 · Alexander Barkov · 8b087c63 · fc63c1e1 · fc63c1e1 · fc63c1e1
Commit fc63c1e1 authored May 10, 2018 by Alexander Barkov
6 changed files
--- a/mysql-test/main/sp-bugs.result
+++ b/mysql-test/main/sp-bugs.result
@@ -292,3 +292,53 @@ SELECT a, a+0;
 END;
 $$
 ERROR 22007: Illegal set 'a,b' value found during parsing
+#
+# Start of 10.3 tests
+#
+#
+# MDEV-16117 SP with a single FOR statement creates but further fails to load
+#
+CREATE PROCEDURE p1()
+FOR i IN 1..10 DO
+set @x = 5;
+END FOR;
+$$
+CALL p1;
+SELECT body FROM mysql.proc WHERE db='test' AND specific_name='p1';
+body
+FOR i IN 1..10 DO
+set @x = 5;
+END FOR
+DROP PROCEDURE p1;
+CREATE PROCEDURE p1() WITH t1 AS (SELECT 1) SELECT 1;
+$$
+CALL p1;
+1
+1
+SELECT body FROM mysql.proc WHERE db='test' AND specific_name='p1';
+body
+WITH t1 AS (SELECT 1) SELECT 1
+DROP PROCEDURE p1;
+CREATE PROCEDURE p1() VALUES (1);
+$$
+CALL p1;
+1
+1
+SELECT body FROM mysql.proc WHERE db='test' AND specific_name='p1';
+body
+VALUES (1)
+DROP PROCEDURE p1;
+CREATE FUNCTION f1() RETURNS INT
+FOR i IN 1..10 DO
+RETURN 1;
+END FOR;
+$$
+SELECT f1();
+f1()
+1
+SELECT body FROM mysql.proc WHERE db='test' AND specific_name='f1';
+body
+FOR i IN 1..10 DO
+RETURN 1;
+END FOR
+DROP FUNCTION f1;
--- a/mysql-test/main/sp-bugs.test
+++ b/mysql-test/main/sp-bugs.test
@@ -321,3 +321,53 @@ BEGIN
 END;
 $$
 DELIMITER ;$$
+--echo #
+--echo # Start of 10.3 tests
+--echo #
+--echo #
+--echo # MDEV-16117 SP with a single FOR statement creates but further fails to load
+--echo #
+DELIMITER $$;
+CREATE PROCEDURE p1()
+  FOR i IN 1..10 DO
+      set @x = 5;
+  END FOR;
+$$
+DELIMITER ;$$
+CALL p1;
+SELECT body FROM mysql.proc WHERE db='test' AND specific_name='p1';
+DROP PROCEDURE p1;
+DELIMITER $$;
+CREATE PROCEDURE p1() WITH t1 AS (SELECT 1) SELECT 1;
+$$
+DELIMITER ;$$
+CALL p1;
+SELECT body FROM mysql.proc WHERE db='test' AND specific_name='p1';
+DROP PROCEDURE p1;
+DELIMITER $$;
+CREATE PROCEDURE p1() VALUES (1);
+$$
+DELIMITER ;$$
+CALL p1;
+SELECT body FROM mysql.proc WHERE db='test' AND specific_name='p1';
+DROP PROCEDURE p1;
+DELIMITER $$;
+CREATE FUNCTION f1() RETURNS INT
+  FOR i IN 1..10 DO
+    RETURN 1;
+  END FOR;
+$$
+DELIMITER ;$$
+SELECT f1();
+SELECT body FROM mysql.proc WHERE db='test' AND specific_name='f1';
+DROP FUNCTION f1;
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -4248,16 +4248,8 @@ class THD :public Statement,
  void parse_error(const char *err_text, const char *yytext)
  {
    Lex_input_stream *lip= &m_parser_state->m_lip;
-    if (!yytext)
+    if (!yytext && !(yytext= lip->get_tok_start()))
-    {
-      if (lip->has_lookahead())
-        yytext= lip->get_tok_start_prev();
-      else
-        yytext= lip->get_tok_start();
-      if (!yytext)
        yytext= "";
-    }
    /* Push an error into the error stack */
    ErrConvString err(yytext, strlen(yytext), variables.character_set_client);
    my_printf_error(ER_PARSE_ERROR,  ER_THD(this, ER_PARSE_ERROR), MYF(0),

--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -841,7 +841,7 @@ Yacc_state::~Yacc_state()
 int Lex_input_stream::find_keyword(Lex_ident_cli_st *kwd,
                                   uint len, bool function)
 {
-  const char *tok= get_tok_start();
+  const char *tok= m_tok_start;
  SYMBOL *symbol= get_hash_symbol(tok, len, function);
  if (symbol)
@@ -957,9 +957,9 @@ LEX_CSTRING Lex_input_stream::get_token(uint skip, uint length)
  LEX_CSTRING tmp;
  yyUnget();                       // ptr points now after last token char
  tmp.length= length;
-  tmp.str= m_thd->strmake(get_tok_start() + skip, tmp.length);
+  tmp.str= m_thd->strmake(m_tok_start + skip, tmp.length);
-  m_cpp_text_start= get_cpp_tok_start() + skip;
+  m_cpp_text_start= m_cpp_tok_start + skip;
  m_cpp_text_end= m_cpp_text_start + tmp.length;
  return tmp;
@@ -1084,7 +1084,7 @@ bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep,
      const char *str, *end;
      char *to;
-      str= get_tok_start();
+      str= m_tok_start;
      end= get_ptr();
      /* Extract the text from the token */
      str += pre_skip;
@@ -1099,7 +1099,7 @@ bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep,
      }
      dst->str= to;
-      m_cpp_text_start= get_cpp_tok_start() + pre_skip;
+      m_cpp_text_start= m_cpp_tok_start + pre_skip;
      m_cpp_text_end= get_cpp_ptr() - post_skip;
      if (!found_escape)
@@ -1972,9 +1972,9 @@ int Lex_input_stream::scan_ident_sysvar(THD *thd, Lex_ident_cli_st *str)
  }
  yyUnget();                       // ptr points now after last token char
-  str->set_ident(get_tok_start(), length, is_8bit);
+  str->set_ident(m_tok_start, length, is_8bit);
-  m_cpp_text_start= get_cpp_tok_start();
+  m_cpp_text_start= m_cpp_tok_start;
  m_cpp_text_end= m_cpp_text_start + length;
  body_utf8_append(m_cpp_text_start);
  body_utf8_append_ident(thd, str, m_cpp_text_end);
@@ -2023,8 +2023,8 @@ int Lex_input_stream::scan_ident_start(THD *thd, Lex_ident_cli_st *str)
  uint length= yyLength();
  yyUnget(); // ptr points now after last token char
-  str->set_ident(get_tok_start(), length, is_8bit);
+  str->set_ident(m_tok_start, length, is_8bit);
-  m_cpp_text_start= get_cpp_tok_start();
+  m_cpp_text_start= m_cpp_tok_start;
  m_cpp_text_end= m_cpp_text_start + length;
  body_utf8_append(m_cpp_text_start);
  body_utf8_append_ident(thd, str, m_cpp_text_end);
@@ -2107,15 +2107,15 @@ int Lex_input_stream::scan_ident_middle(THD *thd, Lex_ident_cli_st *str,
     producing an error.
  */
  DBUG_ASSERT(length > 0);
-  if (resolve_introducer && get_tok_start()[0] == '_')
+  if (resolve_introducer && m_tok_start[0] == '_')
  {
    yyUnget();                       // ptr points now after last token char
-    str->set_ident(get_tok_start(), length, false);
+    str->set_ident(m_tok_start, length, false);
-    m_cpp_text_start= get_cpp_tok_start();
+    m_cpp_text_start= m_cpp_tok_start;
    m_cpp_text_end= m_cpp_text_start + length;
-    body_utf8_append(m_cpp_text_start, get_cpp_tok_start() + length);
+    body_utf8_append(m_cpp_text_start, m_cpp_tok_start + length);
    ErrConvString csname(str->str + 1, str->length - 1, &my_charset_bin);
    CHARSET_INFO *cs= get_charset_by_csname(csname.ptr(),
                                            MY_CS_PRIMARY, MYF(0));
@@ -2128,8 +2128,8 @@ int Lex_input_stream::scan_ident_middle(THD *thd, Lex_ident_cli_st *str,
  }
  yyUnget();                       // ptr points now after last token char
-  str->set_ident(get_tok_start(), length, is_8bit);
+  str->set_ident(m_tok_start, length, is_8bit);
-  m_cpp_text_start= get_cpp_tok_start();
+  m_cpp_text_start= m_cpp_tok_start;
  m_cpp_text_end= m_cpp_text_start + length;
  body_utf8_append(m_cpp_text_start);
  body_utf8_append_ident(thd, str, m_cpp_text_end);
@@ -2165,10 +2165,10 @@ int Lex_input_stream::scan_ident_delimited(THD *thd,
    }
  }
-  str->set_ident_quoted(get_tok_start() + 1, yyLength() - 1, true, quote_char);
+  str->set_ident_quoted(m_tok_start + 1, yyLength() - 1, true, quote_char);
  yyUnget();                       // ptr points now after last token char
-  m_cpp_text_start= get_cpp_tok_start() + 1;
+  m_cpp_text_start= m_cpp_tok_start + 1;
  m_cpp_text_end= m_cpp_text_start + str->length;
  if (c == quote_char)

--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -2331,8 +2331,6 @@ class Lex_input_stream
    return (uint) ((m_ptr - m_tok_start) - 1);
  }
-public:
  /**
    Test if a lookahead token was already scanned by lex_token(),
    for LALR(2) resolution.
@@ -2342,6 +2340,8 @@ class Lex_input_stream
    return lookahead_token >= 0;
  }
+public:
  /**
    End of file indicator for the query text to parse.
    @return true if there are no more characters to parse
@@ -2372,7 +2372,7 @@ class Lex_input_stream
  /** Get the token start position, in the raw buffer. */
  const char *get_tok_start()
  {
-    return m_tok_start;
+    return has_lookahead() ? m_tok_start_prev : m_tok_start;
  }
  void set_cpp_tok_start(const char *pos)
@@ -2386,28 +2386,16 @@ class Lex_input_stream
    return m_tok_end;
  }
-  /** Get the previous token start position, in the raw buffer. */
-  const char *get_tok_start_prev()
-  {
-    return m_tok_start_prev;
-  }
  /** Get the current stream pointer, in the raw buffer. */
  const char *get_ptr()
  {
    return m_ptr;
  }
-  /** Get the previus token start position, in the pre-processed buffer. */
-  const char *get_cpp_start_prev()
-  {
-    return m_cpp_tok_start_prev;
-  }
  /** Get the token start position, in the pre-processed buffer. */
  const char *get_cpp_tok_start()
  {
-    return m_cpp_tok_start;
+    return has_lookahead() ? m_cpp_tok_start_prev : m_cpp_tok_start;
  }
  /** Get the token end position, in the pre-processed buffer. */

--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -17361,12 +17361,7 @@ trigger_tail:
          FOR_SYM
          remember_name /* $13 */
          { /* $14 */
-            /*
+            Lex->raw_trg_on_table_name_end= YYLIP->get_tok_start();
-              FOR token is already passed through (see 'case FOR_SYM' in sql_lex.cc),
-              so we use _prev() to get it back.
-            */
-            DBUG_ASSERT(YYLIP->has_lookahead());
-            Lex->raw_trg_on_table_name_end= YYLIP->get_tok_start_prev();
          }
          EACH_SYM
          ROW_SYM