Commit 96a301bb authored by Alexander Barkov's avatar Alexander Barkov

MDEV-16020 SP variables inside GROUP BY..WITH ROLLUP break replication

The code passing positions in the query to constructors of
Rewritable_query_parameter descendants (e.g. Item_splocal)
was not reliable. It used various Lex_input_stream methods:
- get_tok_start()
- get_tok_start_prev()
- get_tok_end()
- get_ptr()
to find positions of the recently scanned tokens.

The challenge was mostly to choose between get_tok_start()
and get_tok_start_prev(), taking into account to the current
grammar (depending if lookahead takes place before
or after we read the positions in every particular rule).

But this approach did not work at all in combination
with token contractions, when MYSQLlex() translates
two tokens into one token ID, for example:
   WITH ROLLUP -> WITH_ROLLUP_SYM

As a result, the tokenizer is already one more token ahead.
So in query fragment:

  "GROUP BY d, spvar WITH ROLLUP"

get_tok_start() points to "ROLLUP".
get_tok_start_prev() points to "WITH".

As a result, it was "WITH" who was erroneously replaced
to NAME_CONST() instead of "spvar".

This patch modifies the code to do it a different way.

Changes:

1. For keywords and identifiers, the tokenizer now
returns LEX_CTRING pointing directly to the query
fragment. So query positions are now just available using:
- $1.str           - for the beginning of a token
- $1.str+$1.length - for the end of a token

2. Identifiers are not allocated on the THD memory root
in the tokenizer any more. Allocation is now done
on later stages, in methods like LEX::create_item_ident().

3. Two LEX_CSTRING based structures were added:
- Lex_ident_cli_st - used to store the "client side"
  identifier representation, pointing to the
  query fragment. Note, these identifiers
  are encoded in @@character_set_client
  and can have broken byte sequences.

- Lex_ident_sys_st - used to store the "server side"
  identifier representation, pointing to the
  THD allocated memory. This representation
  guarantees that the identifier was checked
  for being well-formed, and is encoded in utf8.

4. To distinguish between two identifier types
   in the grammar, two Bison types were added:
   <ident_cli> and <ident_sys>

5. All non-reserved keywords were marked as
   being of the type <ident_cli>.
   All reserved keywords are still of the type NONE.

6. All curly brackets in rules collecting
   non-reserved keywords into non-terminal
   symbols were removed, e.g.:

   Was:

       keyword_sp_data_type:
         BIT_SYM           {}
       | BOOLEAN_SYM       {}

   Now:

       keyword_sp_data_type:
         BIT_SYM
       | BOOLEAN_SYM

  This is important NOT to have brackets here!!!!
  This is needed to make sure that the underlying
  Lex_ident_cli_ststructure correctly passes up to
  the calling rule.

6. The code to scan identifiers and keywords
  was moved from lex_one_token() into new
  Lex_input_stream methods:

   scan_ident_sysvar()
   scan_ident_start()
   scan_ident_middle()
   scan_ident_delimited()

  This was done to:
  - get rid of enormous amount of references to &yylval->lex_str
  - and remove a lot of references like lip->xxx

7. The allocating functionality which puts identifiers on the
   THD memory root now resides in methods of Lex_ident_sys_st,
   and in THD::to_ident_sys_alloc().
   get_quoted_token() was removed.

8. Cleanup: check_simple_select() was moved as a method to LEX.

9. Cleanup: Some more functionality was moved from *.yy
   to new methods were added to LEX:
     make_item_colon_ident_ident()
     make_item_func_call_generic()
     create_item_qualified_asterisk()
parent 6c5e60f1
......@@ -84,3 +84,91 @@ master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `test`; DROP TABLE `t1` /* generated by server */
master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `test`; DROP PROCEDURE p1
#
# MDEV-16020 SP variables inside GROUP BY..WITH ROLLUP break replication
#
FLUSH LOGS;
CREATE TABLE t1 (d DATE);
INSERT INTO t1 VALUES ('1985-05-13'),('1989-12-24');
CREATE TABLE t2 (d DATE, c BIGINT);
BEGIN NOT ATOMIC
BEGIN
DECLARE var INT DEFAULT 10;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, var;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, var WITH ROLLUP;
END;
BEGIN
DECLARE atomic INT DEFAULT 20;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, atomic;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, atomic WITH ROLLUP;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, `atomic`;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, `atomic` WITH ROLLUP;
END;
BEGIN
DECLARE atomic ROW (atomic INT, xxx INT) DEFAULT (31,32);
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, atomic.atomic;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, atomic.atomic WITH ROLLUP;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, `atomic`.`atomic`;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, `atomic`.`atomic` WITH ROLLUP;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, atomic.xxx;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, atomic.xxx WITH ROLLUP;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, `atomic`.`xxx`;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, `atomic`.`xxx` WITH ROLLUP;
END;
END;
$$
DROP TABLE t1,t2;
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000002 # Binlog_checkpoint # # master-bin.000002
master-bin.000002 # Gtid # # GTID #-#-#
master-bin.000002 # Query # # use `test`; CREATE TABLE t1 (d DATE)
master-bin.000002 # Gtid # # BEGIN GTID #-#-#
master-bin.000002 # Query # # use `test`; INSERT INTO t1 VALUES ('1985-05-13'),('1989-12-24')
master-bin.000002 # Query # # COMMIT
master-bin.000002 # Gtid # # GTID #-#-#
master-bin.000002 # Query # # use `test`; CREATE TABLE t2 (d DATE, c BIGINT)
master-bin.000002 # Gtid # # BEGIN GTID #-#-#
master-bin.000002 # Query # # use `test`; INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, NAME_CONST('var',10)
master-bin.000002 # Query # # COMMIT
master-bin.000002 # Gtid # # BEGIN GTID #-#-#
master-bin.000002 # Query # # use `test`; INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, NAME_CONST('var',10) WITH ROLLUP
master-bin.000002 # Query # # COMMIT
master-bin.000002 # Gtid # # BEGIN GTID #-#-#
master-bin.000002 # Query # # use `test`; INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, NAME_CONST('atomic',20)
master-bin.000002 # Query # # COMMIT
master-bin.000002 # Gtid # # BEGIN GTID #-#-#
master-bin.000002 # Query # # use `test`; INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, NAME_CONST('atomic',20) WITH ROLLUP
master-bin.000002 # Query # # COMMIT
master-bin.000002 # Gtid # # BEGIN GTID #-#-#
master-bin.000002 # Query # # use `test`; INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, NAME_CONST('atomic',20)
master-bin.000002 # Query # # COMMIT
master-bin.000002 # Gtid # # BEGIN GTID #-#-#
master-bin.000002 # Query # # use `test`; INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, NAME_CONST('atomic',20) WITH ROLLUP
master-bin.000002 # Query # # COMMIT
master-bin.000002 # Gtid # # BEGIN GTID #-#-#
master-bin.000002 # Query # # use `test`; INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, NAME_CONST('atomic.atomic',31)
master-bin.000002 # Query # # COMMIT
master-bin.000002 # Gtid # # BEGIN GTID #-#-#
master-bin.000002 # Query # # use `test`; INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, NAME_CONST('atomic.atomic',31) WITH ROLLUP
master-bin.000002 # Query # # COMMIT
master-bin.000002 # Gtid # # BEGIN GTID #-#-#
master-bin.000002 # Query # # use `test`; INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, NAME_CONST('atomic.atomic',31)
master-bin.000002 # Query # # COMMIT
master-bin.000002 # Gtid # # BEGIN GTID #-#-#
master-bin.000002 # Query # # use `test`; INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, NAME_CONST('atomic.atomic',31) WITH ROLLUP
master-bin.000002 # Query # # COMMIT
master-bin.000002 # Gtid # # BEGIN GTID #-#-#
master-bin.000002 # Query # # use `test`; INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, NAME_CONST('atomic.xxx',32)
master-bin.000002 # Query # # COMMIT
master-bin.000002 # Gtid # # BEGIN GTID #-#-#
master-bin.000002 # Query # # use `test`; INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, NAME_CONST('atomic.xxx',32) WITH ROLLUP
master-bin.000002 # Query # # COMMIT
master-bin.000002 # Gtid # # BEGIN GTID #-#-#
master-bin.000002 # Query # # use `test`; INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, NAME_CONST('atomic.xxx',32)
master-bin.000002 # Query # # COMMIT
master-bin.000002 # Gtid # # BEGIN GTID #-#-#
master-bin.000002 # Query # # use `test`; INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, NAME_CONST('atomic.xxx',32) WITH ROLLUP
master-bin.000002 # Query # # COMMIT
master-bin.000002 # Gtid # # GTID #-#-#
master-bin.000002 # Query # # use `test`; DROP TABLE `t1`,`t2` /* generated by server */
......@@ -39,3 +39,44 @@ DROP PROCEDURE p1;
--let $binlog_file = LAST
source include/show_binlog_events.inc;
--echo #
--echo # MDEV-16020 SP variables inside GROUP BY..WITH ROLLUP break replication
--echo #
FLUSH LOGS;
CREATE TABLE t1 (d DATE);
INSERT INTO t1 VALUES ('1985-05-13'),('1989-12-24');
CREATE TABLE t2 (d DATE, c BIGINT);
DELIMITER $$;
BEGIN NOT ATOMIC
BEGIN
DECLARE var INT DEFAULT 10;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, var;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, var WITH ROLLUP;
END;
BEGIN
DECLARE atomic INT DEFAULT 20;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, atomic;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, atomic WITH ROLLUP;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, `atomic`;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, `atomic` WITH ROLLUP;
END;
BEGIN
DECLARE atomic ROW (atomic INT, xxx INT) DEFAULT (31,32);
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, atomic.atomic;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, atomic.atomic WITH ROLLUP;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, `atomic`.`atomic`;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, `atomic`.`atomic` WITH ROLLUP;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, atomic.xxx;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, atomic.xxx WITH ROLLUP;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, `atomic`.`xxx`;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, `atomic`.`xxx` WITH ROLLUP;
END;
END;
$$
DELIMITER ;$$
DROP TABLE t1,t2;
--let $binlog_file = LAST
source include/show_binlog_events.inc;
......@@ -475,3 +475,36 @@ master-bin.000005 # Gtid # # GTID #-#-#
master-bin.000005 # Query # # use `test`; DROP TABLE "t2" /* generated by server */
master-bin.000005 # Gtid # # GTID #-#-#
master-bin.000005 # Query # # use `test`; DROP PROCEDURE p1
#
# MDEV-16020 SP variables inside GROUP BY..WITH ROLLUP break replication
#
FLUSH LOGS;
CREATE TABLE t1 (d DATE);
INSERT INTO t1 VALUES ('1985-05-13'),('1989-12-24');
CREATE TABLE t2 (d DATE, c BIGINT);
DECLARE
var INT;
BEGIN
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, var;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, var WITH ROLLUP;
END;
$$
DROP TABLE t1,t2;
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000006 # Binlog_checkpoint # # master-bin.000006
master-bin.000006 # Gtid # # GTID #-#-#
master-bin.000006 # Query # # use `test`; CREATE TABLE t1 (d DATE)
master-bin.000006 # Gtid # # BEGIN GTID #-#-#
master-bin.000006 # Query # # use `test`; INSERT INTO t1 VALUES ('1985-05-13'),('1989-12-24')
master-bin.000006 # Query # # COMMIT
master-bin.000006 # Gtid # # GTID #-#-#
master-bin.000006 # Query # # use `test`; CREATE TABLE t2 (d DATE, c BIGINT)
master-bin.000006 # Gtid # # BEGIN GTID #-#-#
master-bin.000006 # Query # # use `test`; INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, NAME_CONST('var',NULL)
master-bin.000006 # Query # # COMMIT
master-bin.000006 # Gtid # # BEGIN GTID #-#-#
master-bin.000006 # Query # # use `test`; INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, NAME_CONST('var',NULL) WITH ROLLUP
master-bin.000006 # Query # # COMMIT
master-bin.000006 # Gtid # # GTID #-#-#
master-bin.000006 # Query # # use `test`; DROP TABLE "t1","t2" /* generated by server */
......@@ -194,3 +194,26 @@ DROP TABLE t2;
DROP PROCEDURE p1;
--let $binlog_file = LAST
source include/show_binlog_events.inc;
--echo #
--echo # MDEV-16020 SP variables inside GROUP BY..WITH ROLLUP break replication
--echo #
FLUSH LOGS;
CREATE TABLE t1 (d DATE);
INSERT INTO t1 VALUES ('1985-05-13'),('1989-12-24');
CREATE TABLE t2 (d DATE, c BIGINT);
DELIMITER $$;
DECLARE
var INT;
BEGIN
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, var;
INSERT INTO t2 SELECT d, COUNT(*) FROM t1 GROUP BY d, var WITH ROLLUP;
END;
$$
DELIMITER ;$$
DROP TABLE t1,t2;
--let $binlog_file = LAST
source include/show_binlog_events.inc;
......@@ -2498,6 +2498,39 @@ bool THD::convert_string(String *s, CHARSET_INFO *from_cs, CHARSET_INFO *to_cs)
}
bool THD::check_string_for_wellformedness(const char *str,
size_t length,
CHARSET_INFO *cs) const
{
DBUG_ASSERT(charset_is_system_charset);
size_t wlen= Well_formed_prefix(cs, str, length).length();
if (wlen < length)
{
ErrConvString err(str, length, &my_charset_bin);
my_error(ER_INVALID_CHARACTER_STRING, MYF(0), cs->csname, err.ptr());
return true;
}
return false;
}
bool THD::to_ident_sys_alloc(Lex_ident_sys_st *to, const Lex_ident_cli_st *ident)
{
if (ident->is_quoted())
{
LEX_CSTRING unquoted;
if (quote_unescape(&unquoted, ident, ident->quote()))
return true;
return charset_is_system_charset ?
to->copy_sys(this, &unquoted) :
to->convert(this, &unquoted, charset());
}
return charset_is_system_charset ?
to->copy_sys(this, ident) :
to->copy_or_convert(this, ident, charset());
}
Item_basic_constant *
THD::make_string_literal(const char *str, size_t length, uint repertoire)
{
......
......@@ -3658,6 +3658,26 @@ class THD :public Statement,
lex_str->length= length;
return lex_str;
}
// Remove double quotes: aaa""bbb -> aaa"bbb
bool quote_unescape(LEX_CSTRING *dst, const LEX_CSTRING *src, char quote)
{
const char *tmp= src->str;
const char *tmpend= src->str + src->length;
char *to;
if (!(dst->str= to= (char *) alloc(src->length + 1)))
{
dst->length= 0; // Safety
return true;
}
for ( ; tmp < tmpend; )
{
if ((*to++= *tmp++) == quote)
tmp++; // Skip double quotes
}
*to= 0; // End null for safety
dst->length= to - dst->str;
return false;
}
LEX_CSTRING *make_clex_string(const char* str, size_t length)
{
......@@ -3701,7 +3721,6 @@ class THD :public Statement,
bool convert_with_error(CHARSET_INFO *dstcs, LEX_STRING *dst,
CHARSET_INFO *srccs,
const char *src, size_t src_length);
/*
If either "dstcs" or "srccs" is &my_charset_bin,
then performs native copying using cs->cset->copy_fix().
......@@ -3720,6 +3739,17 @@ class THD :public Statement,
bool convert_string(String *s, CHARSET_INFO *from_cs, CHARSET_INFO *to_cs);
/*
Check if the string is wellformed, raise an error if not wellformed.
@param str - The string to check.
@param length - the string length.
*/
bool check_string_for_wellformedness(const char *str,
size_t length,
CHARSET_INFO *cs) const;
bool to_ident_sys_alloc(Lex_ident_sys_st *to, const Lex_ident_cli_st *from);
/*
Create a string literal with optional client->connection conversion.
@param str - the string in the client character set
......@@ -3827,7 +3857,7 @@ class THD :public Statement,
void set_stmt_da(Diagnostics_area *da)
{ m_stmt_da= da; }
inline CHARSET_INFO *charset() { return variables.character_set_client; }
inline CHARSET_INFO *charset() const { return variables.character_set_client; }
void update_charset();
void update_charset(CHARSET_INFO *character_set_client,
CHARSET_INFO *collation_connection)
......
This diff is collapsed.
This diff is collapsed.
......@@ -9059,32 +9059,6 @@ bool append_file_to_dir(THD *thd, const char **filename_ptr,
}
/**
Check if the select is a simple select (not an union).
@retval
0 ok
@retval
1 error ; In this case the error messege is sent to the client
*/
bool check_simple_select()
{
THD *thd= current_thd;
LEX *lex= thd->lex;
if (lex->current_select != &lex->select_lex)
{
char command[80];
Lex_input_stream *lip= & thd->m_parser_state->m_lip;
strmake(command, lip->yylval->symbol.str,
MY_MIN(lip->yylval->symbol.length, sizeof(command)-1));
my_error(ER_CANT_USE_OPTION_HERE, MYF(0), command);
return 1;
}
return 0;
}
Comp_creator *comp_eq_creator(bool invert)
{
return invert?(Comp_creator *)&ne_creator:(Comp_creator *)&eq_creator;
......
......@@ -119,7 +119,6 @@ bool push_new_name_resolution_context(THD *thd,
TABLE_LIST *left_op,
TABLE_LIST *right_op);
void init_update_queries(void);
bool check_simple_select();
Item *normalize_cond(THD *thd, Item *cond);
Item *negate_expression(THD *thd, Item *expr);
bool check_stack_overrun(THD *thd, long margin, uchar *dummy);
......
This diff is collapsed.
This diff is collapsed.
......@@ -754,12 +754,6 @@ class Lex_trim: public Lex_trim_st
};
struct Lex_string_with_pos_st: public LEX_CSTRING
{
const char *m_pos;
};
class Load_data_param
{
protected:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment