Commit 50eee605 authored by Alexander Barkov's avatar Alexander Barkov

Preparatory refactoring for:

MDEV-6218 Wrong result of CHAR_LENGTH(non-BMP-character) with 3-byte utf8
- Moving get_text() as a method to Lex_input_stream.
- Moving the unescaping part into a separate function,
  this piece of code will later go to /strings most likely.
- Removing Lex_input_string::yytoklen, as it's not needed any more.
parent 01d7da67
...@@ -2960,6 +2960,10 @@ public: ...@@ -2960,6 +2960,10 @@ public:
return (bool) (variables.sql_mode & (MODE_STRICT_TRANS_TABLES | return (bool) (variables.sql_mode & (MODE_STRICT_TRANS_TABLES |
MODE_STRICT_ALL_TABLES)); MODE_STRICT_ALL_TABLES));
} }
inline bool backslash_escapes() const
{
return !MY_TEST(variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES);
}
inline my_time_t query_start() { query_start_used=1; return start_time; } inline my_time_t query_start() { query_start_used=1; return start_time; }
inline ulong query_start_sec_part() inline ulong query_start_sec_part()
{ query_start_sec_part_used=1; return start_time_sec_part; } { query_start_sec_part_used=1; return start_time_sec_part; }
......
...@@ -281,7 +281,6 @@ void ...@@ -281,7 +281,6 @@ void
Lex_input_stream::reset(char *buffer, unsigned int length) Lex_input_stream::reset(char *buffer, unsigned int length)
{ {
yylineno= 1; yylineno= 1;
yytoklen= 0;
yylval= NULL; yylval= NULL;
lookahead_token= -1; lookahead_token= -1;
lookahead_yylval= NULL; lookahead_yylval= NULL;
...@@ -641,7 +640,7 @@ static LEX_STRING get_token(Lex_input_stream *lip, uint skip, uint length) ...@@ -641,7 +640,7 @@ static LEX_STRING get_token(Lex_input_stream *lip, uint skip, uint length)
{ {
LEX_STRING tmp; LEX_STRING tmp;
lip->yyUnget(); // ptr points now after last token char lip->yyUnget(); // ptr points now after last token char
tmp.length=lip->yytoklen=length; tmp.length= length;
tmp.str= lip->m_thd->strmake(lip->get_tok_start() + skip, tmp.length); tmp.str= lip->m_thd->strmake(lip->get_tok_start() + skip, tmp.length);
lip->m_cpp_text_start= lip->get_cpp_tok_start() + skip; lip->m_cpp_text_start= lip->get_cpp_tok_start() + skip;
...@@ -665,7 +664,7 @@ static LEX_STRING get_quoted_token(Lex_input_stream *lip, ...@@ -665,7 +664,7 @@ static LEX_STRING get_quoted_token(Lex_input_stream *lip,
const char *from, *end; const char *from, *end;
char *to; char *to;
lip->yyUnget(); // ptr points now after last token char lip->yyUnget(); // ptr points now after last token char
tmp.length= lip->yytoklen=length; tmp.length= length;
tmp.str=(char*) lip->m_thd->alloc(tmp.length+1); tmp.str=(char*) lip->m_thd->alloc(tmp.length+1);
from= lip->get_tok_start() + skip; from= lip->get_tok_start() + skip;
to= tmp.str; to= tmp.str;
...@@ -687,135 +686,152 @@ static LEX_STRING get_quoted_token(Lex_input_stream *lip, ...@@ -687,135 +686,152 @@ static LEX_STRING get_quoted_token(Lex_input_stream *lip,
} }
static size_t
my_unescape(CHARSET_INFO *cs, char *to, const char *str, const char *end,
int sep, bool backslash_escapes)
{
char *start= to;
for ( ; str != end ; str++)
{
#ifdef USE_MB
int l;
if (use_mb(cs) && (l= my_ismbchar(cs, str, end)))
{
while (l--)
*to++ = *str++;
str--;
continue;
}
#endif
if (backslash_escapes && *str == '\\' && str + 1 != end)
{
switch(*++str) {
case 'n':
*to++='\n';
break;
case 't':
*to++= '\t';
break;
case 'r':
*to++ = '\r';
break;
case 'b':
*to++ = '\b';
break;
case '0':
*to++= 0; // Ascii null
break;
case 'Z': // ^Z must be escaped on Win32
*to++='\032';
break;
case '_':
case '%':
*to++= '\\'; // remember prefix for wildcard
/* Fall through */
default:
*to++= *str;
break;
}
}
else if (*str == sep)
*to++= *str++; // Two ' or "
else
*to++ = *str;
}
*to= 0;
return to - start;
}
size_t
Lex_input_stream::unescape(CHARSET_INFO *cs, char *to,
const char *str, const char *end,
int sep)
{
return my_unescape(cs, to, str, end, sep, m_thd->backslash_escapes());
}
/* /*
Return an unescaped text literal without quotes Return an unescaped text literal without quotes
Fix sometimes to do only one scan of the string Fix sometimes to do only one scan of the string
*/ */
static char *get_text(Lex_input_stream *lip, int pre_skip, int post_skip) bool Lex_input_stream::get_text(LEX_STRING *dst, int pre_skip, int post_skip)
{ {
reg1 uchar c,sep; reg1 uchar c,sep;
uint found_escape=0; uint found_escape=0;
CHARSET_INFO *cs= lip->m_thd->charset(); CHARSET_INFO *cs= m_thd->charset();
lip->tok_bitmap= 0; tok_bitmap= 0;
sep= lip->yyGetLast(); // String should end with this sep= yyGetLast(); // String should end with this
while (! lip->eof()) while (! eof())
{ {
c= lip->yyGet(); c= yyGet();
lip->tok_bitmap|= c; tok_bitmap|= c;
#ifdef USE_MB #ifdef USE_MB
{ {
int l; int l;
if (use_mb(cs) && if (use_mb(cs) &&
(l = my_ismbchar(cs, (l = my_ismbchar(cs,
lip->get_ptr() -1, get_ptr() -1,
lip->get_end_of_query()))) { get_end_of_query()))) {
lip->skip_binary(l-1); skip_binary(l-1);
continue; continue;
} }
} }
#endif #endif
if (c == '\\' && if (c == '\\' &&
!(lip->m_thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES)) !(m_thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES))
{ // Escaped character { // Escaped character
found_escape=1; found_escape=1;
if (lip->eof()) if (eof())
return 0; return true;
lip->yySkip(); yySkip();
} }
else if (c == sep) else if (c == sep)
{ {
if (c == lip->yyGet()) // Check if two separators in a row if (c == yyGet()) // Check if two separators in a row
{ {
found_escape=1; // duplicate. Remember for delete found_escape=1; // duplicate. Remember for delete
continue; continue;
} }
else else
lip->yyUnget(); yyUnget();
/* Found end. Unescape and return string */ /* Found end. Unescape and return string */
const char *str, *end; const char *str, *end;
char *start;
str= lip->get_tok_start(); str= get_tok_start();
end= lip->get_ptr(); end= get_ptr();
/* Extract the text from the token */ /* Extract the text from the token */
str += pre_skip; str += pre_skip;
end -= post_skip; end -= post_skip;
DBUG_ASSERT(end >= str); DBUG_ASSERT(end >= str);
if (!(start= (char*) lip->m_thd->alloc((uint) (end-str)+1))) if (!(dst->str= (char*) m_thd->alloc((uint) (end - str) + 1)))
return (char*) ""; // Sql_alloc has set error flag {
dst->str= (char*) ""; // Sql_alloc has set error flag
dst->length= 0;
return true;
}
lip->m_cpp_text_start= lip->get_cpp_tok_start() + pre_skip; m_cpp_text_start= get_cpp_tok_start() + pre_skip;
lip->m_cpp_text_end= lip->get_cpp_ptr() - post_skip; m_cpp_text_end= get_cpp_ptr() - post_skip;
if (!found_escape) if (!found_escape)
{ {
lip->yytoklen=(uint) (end-str); memcpy(dst->str, str, dst->length= (end - str));
memcpy(start,str,lip->yytoklen); dst->str[dst->length]= 0;
start[lip->yytoklen]=0;
} }
else else
{ {
char *to; dst->length= unescape(cs, dst->str, str, end, sep);
for (to=start ; str != end ; str++)
{
#ifdef USE_MB
int l;
if (use_mb(cs) &&
(l = my_ismbchar(cs, str, end))) {
while (l--)
*to++ = *str++;
str--;
continue;
}
#endif
if (!(lip->m_thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES) &&
*str == '\\' && str+1 != end)
{
switch(*++str) {
case 'n':
*to++='\n';
break;
case 't':
*to++= '\t';
break;
case 'r':
*to++ = '\r';
break;
case 'b':
*to++ = '\b';
break;
case '0':
*to++= 0; // Ascii null
break;
case 'Z': // ^Z must be escaped on Win32
*to++='\032';
break;
case '_':
case '%':
*to++= '\\'; // remember prefix for wildcard
/* Fall through */
default:
*to++= *str;
break;
}
} }
else if (*str == sep) return false;
*to++= *str++; // Two ' or "
else
*to++ = *str;
}
*to=0;
lip->yytoklen=(uint) (to-start);
}
return start;
} }
} }
return 0; // unexpected end of query return true; // unexpected end of query
} }
...@@ -1122,12 +1138,11 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) ...@@ -1122,12 +1138,11 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
} }
/* Found N'string' */ /* Found N'string' */
lip->yySkip(); // Skip ' lip->yySkip(); // Skip '
if (!(yylval->lex_str.str = get_text(lip, 2, 1))) if (lip->get_text(&yylval->lex_str, 2, 1))
{ {
state= MY_LEX_CHAR; // Read char by char state= MY_LEX_CHAR; // Read char by char
break; break;
} }
yylval->lex_str.length= lip->yytoklen;
lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1; lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
return(NCHAR_STRING); return(NCHAR_STRING);
...@@ -1488,12 +1503,11 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) ...@@ -1488,12 +1503,11 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
} }
/* " used for strings */ /* " used for strings */
case MY_LEX_STRING: // Incomplete text string case MY_LEX_STRING: // Incomplete text string
if (!(yylval->lex_str.str = get_text(lip, 1, 1))) if (lip->get_text(&yylval->lex_str, 1, 1))
{ {
state= MY_LEX_CHAR; // Read char by char state= MY_LEX_CHAR; // Read char by char
break; break;
} }
yylval->lex_str.length=lip->yytoklen;
lip->body_utf8_append(lip->m_cpp_text_start); lip->body_utf8_append(lip->m_cpp_text_start);
......
...@@ -1804,6 +1804,8 @@ enum enum_comment_state ...@@ -1804,6 +1804,8 @@ enum enum_comment_state
class Lex_input_stream class Lex_input_stream
{ {
size_t unescape(CHARSET_INFO *cs, char *to,
const char *str, const char *end, int sep);
public: public:
Lex_input_stream() Lex_input_stream()
{ {
...@@ -2088,9 +2090,6 @@ public: ...@@ -2088,9 +2090,6 @@ public:
/** Current line number. */ /** Current line number. */
uint yylineno; uint yylineno;
/** Length of the last token parsed. */
uint yytoklen;
/** Interface with bison, value of the last token parsed. */ /** Interface with bison, value of the last token parsed. */
LEX_YYSTYPE yylval; LEX_YYSTYPE yylval;
...@@ -2105,6 +2104,7 @@ public: ...@@ -2105,6 +2104,7 @@ public:
/** LALR(2) resolution, value of the look ahead token.*/ /** LALR(2) resolution, value of the look ahead token.*/
LEX_YYSTYPE lookahead_yylval; LEX_YYSTYPE lookahead_yylval;
bool get_text(LEX_STRING *to, int pre_skip, int post_skip);
private: private:
/** Pointer to the current position in the raw input stream. */ /** Pointer to the current position in the raw input stream. */
char *m_ptr; char *m_ptr;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment