Commit 4a126bf3 authored by Alexander Barkov's avatar Alexander Barkov

Removing some duplicate code in THD::convert_string() & friends

1. Adding THD::convert_string(LEX_CSTRING *to,...) as a wrapper
   for convert_string(LEX_STRING *to,...), as LEX_CSTRING
   is now frequently used for conversion purpose.
   This reduced duplicate code in TEXT_STRING_sys,
   TEXT_STRING_literal, TEXT_STRING_filesystem grammar rules in *.yy

2. Adding yet another THD::convert_string() with an extra parameter
   "bool simple_copy_is_possible". This even more reduced
   repeatable code in the mentioned grammar rules in *.yy

3. Deriving Lex_ident_cli_st from Lex_string_with_metadata_st,
   as they have very similar functionality. Moving m_quote
   from Lex_ident_cli_st to Lex_string_with_metadata_st,
   as m_quote will be used later to optimize string literals anyway
   (e.g. avoid redundant copying on the tokenizer stage).
   Adjusting Lex_input_stream::get_text() accordingly.

4. Moving the reminders of the code in TEXT_STRING_sys, TEXT_STRING_literal,
   TEXT_STRING_filesystem grammar rules as new methods in THD:
   - make_text_string_sys()
   - make_text_string_connection()
   - make_text_string_filesystem()
   and changing *.yy to use these new methods.
   This reduced the amount of similar code in
   sql_yacc.yy and sql_yacc_ora.yy.

5. Removing duplicate code in Lex_input_stream::body_utf8_append_ident():
   by reusing THD::make_text_string_sys(). Thanks to #3 and #4.

6. Making THD members charset_is_system_charset,
   charset_is_collation_connection, charset_is_character_set_filesystem
   private, as they are not needed externally any more.
parent af682525
...@@ -3130,8 +3130,10 @@ class THD :public Statement, ...@@ -3130,8 +3130,10 @@ class THD :public Statement,
is set if a statement accesses a temporary table created through is set if a statement accesses a temporary table created through
CREATE TEMPORARY TABLE. CREATE TEMPORARY TABLE.
*/ */
private:
bool charset_is_system_charset, charset_is_collation_connection; bool charset_is_system_charset, charset_is_collation_connection;
bool charset_is_character_set_filesystem; bool charset_is_character_set_filesystem;
public:
bool enable_slow_log; /* Enable slow log for current statement */ bool enable_slow_log; /* Enable slow log for current statement */
bool abort_on_warning; bool abort_on_warning;
bool got_warning; /* Set on call to push_warning() */ bool got_warning; /* Set on call to push_warning() */
...@@ -3706,6 +3708,25 @@ class THD :public Statement, ...@@ -3706,6 +3708,25 @@ class THD :public Statement,
bool convert_string(LEX_STRING *to, CHARSET_INFO *to_cs, bool convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
const char *from, size_t from_length, const char *from, size_t from_length,
CHARSET_INFO *from_cs); CHARSET_INFO *from_cs);
bool convert_string(LEX_CSTRING *to, CHARSET_INFO *to_cs,
const char *from, size_t from_length,
CHARSET_INFO *from_cs)
{
LEX_STRING tmp;
bool rc= convert_string(&tmp, to_cs, from, from_length, from_cs);
to->str= tmp.str;
to->length= tmp.length;
return rc;
}
bool convert_string(LEX_CSTRING *to, CHARSET_INFO *tocs,
const LEX_CSTRING *from, CHARSET_INFO *fromcs,
bool simple_copy_is_possible)
{
if (!simple_copy_is_possible)
return unlikely(convert_string(to, tocs, from->str, from->length, fromcs));
*to= *from;
return false;
}
/* /*
Convert a strings between character sets. Convert a strings between character sets.
Uses my_convert_fix(), which uses an mb_wc .. mc_mb loop internally. Uses my_convert_fix(), which uses an mb_wc .. mc_mb loop internally.
...@@ -3767,6 +3788,24 @@ class THD :public Statement, ...@@ -3767,6 +3788,24 @@ class THD :public Statement,
Item_basic_constant *make_string_literal_nchar(const Lex_string_with_metadata_st &str); Item_basic_constant *make_string_literal_nchar(const Lex_string_with_metadata_st &str);
Item_basic_constant *make_string_literal_charset(const Lex_string_with_metadata_st &str, Item_basic_constant *make_string_literal_charset(const Lex_string_with_metadata_st &str,
CHARSET_INFO *cs); CHARSET_INFO *cs);
bool make_text_string_sys(LEX_CSTRING *to,
const Lex_string_with_metadata_st *from)
{
return convert_string(to, system_charset_info,
from, charset(), charset_is_system_charset);
}
bool make_text_string_connection(LEX_CSTRING *to,
const Lex_string_with_metadata_st *from)
{
return convert_string(to, variables.collation_connection,
from, charset(), charset_is_collation_connection);
}
bool make_text_string_filesystem(LEX_CSTRING *to,
const Lex_string_with_metadata_st *from)
{
return convert_string(to, variables.character_set_filesystem,
from, charset(), charset_is_character_set_filesystem);
}
void add_changed_table(TABLE *table); void add_changed_table(TABLE *table);
void add_changed_table(const char *key, size_t key_length); void add_changed_table(const char *key, size_t key_length);
CHANGED_TABLE_LIST * changed_table_dup(const char *key, size_t key_length); CHANGED_TABLE_LIST * changed_table_dup(const char *key, size_t key_length);
......
...@@ -416,32 +416,18 @@ void Lex_input_stream::body_utf8_append(const char *ptr) ...@@ -416,32 +416,18 @@ void Lex_input_stream::body_utf8_append(const char *ptr)
operation. operation.
*/ */
void Lex_input_stream::body_utf8_append_ident(THD *thd, void
const LEX_CSTRING *txt, Lex_input_stream::body_utf8_append_ident(THD *thd,
const Lex_string_with_metadata_st *txt,
const char *end_ptr) const char *end_ptr)
{ {
if (!m_cpp_utf8_processed_ptr) if (!m_cpp_utf8_processed_ptr)
return; return;
LEX_CSTRING utf_txt; LEX_CSTRING utf_txt;
CHARSET_INFO *txt_cs= thd->charset(); thd->make_text_string_sys(&utf_txt, txt); // QQ: check return value?
if (!my_charset_same(txt_cs, &my_charset_utf8_general_ci))
{
LEX_STRING to;
thd->convert_string(&to,
&my_charset_utf8_general_ci,
txt->str, (uint) txt->length,
txt_cs);
utf_txt.str= to.str;
utf_txt.length= to.length;
}
else
utf_txt= *txt;
/* NOTE: utf_txt.length is in bytes, not in symbols. */ /* NOTE: utf_txt.length is in bytes, not in symbols. */
memcpy(m_body_utf8_ptr, utf_txt.str, utf_txt.length); memcpy(m_body_utf8_ptr, utf_txt.str, utf_txt.length);
m_body_utf8_ptr += utf_txt.length; m_body_utf8_ptr += utf_txt.length;
*m_body_utf8_ptr= 0; *m_body_utf8_ptr= 0;
...@@ -1043,13 +1029,13 @@ bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep, ...@@ -1043,13 +1029,13 @@ bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep,
uchar c; uchar c;
uint found_escape=0; uint found_escape=0;
CHARSET_INFO *cs= m_thd->charset(); CHARSET_INFO *cs= m_thd->charset();
bool is_8bit= false;
dst->set_8bit(false);
while (! eof()) while (! eof())
{ {
c= yyGet(); c= yyGet();
if (c & 0x80) if (c & 0x80)
dst->set_8bit(true); is_8bit= true;
#ifdef USE_MB #ifdef USE_MB
{ {
int l; int l;
...@@ -1093,23 +1079,24 @@ bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep, ...@@ -1093,23 +1079,24 @@ bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep,
if (!(to= (char*) m_thd->alloc((uint) (end - str) + 1))) if (!(to= (char*) m_thd->alloc((uint) (end - str) + 1)))
{ {
dst->str= ""; // Sql_alloc has set error flag dst->set(&empty_clex_str, 0, '\0');
dst->length= 0; return true; // Sql_alloc has set error flag
return true;
} }
dst->str= to;
m_cpp_text_start= m_cpp_tok_start + pre_skip; m_cpp_text_start= m_cpp_tok_start + pre_skip;
m_cpp_text_end= get_cpp_ptr() - post_skip; m_cpp_text_end= get_cpp_ptr() - post_skip;
if (!found_escape) if (!found_escape)
{ {
memcpy(to, str, dst->length= (end - str)); size_t len= (end - str);
to[dst->length]= 0; memcpy(to, str, len);
to[len]= '\0';
dst->set(to, len, is_8bit, '\0');
} }
else else
{ {
dst->length= unescape(cs, to, str, end, sep); size_t len= unescape(cs, to, str, end, sep);
dst->set(to, len, is_8bit, '\0');
} }
return false; return false;
} }
......
...@@ -37,12 +37,16 @@ ...@@ -37,12 +37,16 @@
/** /**
A string with metadata. A string with metadata. Usually points to a string in the client
character set, but unlike Lex_ident_cli_st (see below) it does not
necessarily point to a query fragment. It can also point to memory
of other kinds (e.g. an additional THD allocated memory buffer
not overlapping with the current query text).
We'll add more flags here eventually, to know if the string has, e.g.: We'll add more flags here eventually, to know if the string has, e.g.:
- multi-byte characters - multi-byte characters
- bad byte sequences - bad byte sequences
- backslash escapes: 'a\nb' - backslash escapes: 'a\nb'
- separator escapes: 'a''b'
and reuse the original query fragments instead of making the string and reuse the original query fragments instead of making the string
copy too early, in Lex_input_stream::get_text(). copy too early, in Lex_input_stream::get_text().
This will allow to avoid unnecessary copying, as well as This will allow to avoid unnecessary copying, as well as
...@@ -50,9 +54,30 @@ ...@@ -50,9 +54,30 @@
*/ */
struct Lex_string_with_metadata_st: public LEX_CSTRING struct Lex_string_with_metadata_st: public LEX_CSTRING
{ {
private:
bool m_is_8bit; // True if the string has 8bit characters bool m_is_8bit; // True if the string has 8bit characters
char m_quote; // Quote character, or 0 if not quoted
public: public:
void set_8bit(bool is_8bit) { m_is_8bit= is_8bit; } void set_8bit(bool is_8bit) { m_is_8bit= is_8bit; }
void set_metadata(bool is_8bit, char quote)
{
m_is_8bit= is_8bit;
m_quote= quote;
}
void set(const char *s, size_t len, bool is_8bit, char quote)
{
str= s;
length= len;
set_metadata(is_8bit, quote);
}
void set(const LEX_CSTRING *s, bool is_8bit, char quote)
{
((LEX_CSTRING &)*this)= *s;
set_metadata(is_8bit, quote);
}
bool is_8bit() const { return m_is_8bit; }
bool is_quoted() const { return m_quote != '\0'; }
char quote() const { return m_quote; }
// Get string repertoire by the 8-bit flag and the character set // Get string repertoire by the 8-bit flag and the character set
uint repertoire(CHARSET_INFO *cs) const uint repertoire(CHARSET_INFO *cs) const
{ {
...@@ -71,44 +96,27 @@ struct Lex_string_with_metadata_st: public LEX_CSTRING ...@@ -71,44 +96,27 @@ struct Lex_string_with_metadata_st: public LEX_CSTRING
Used to store identifiers in the client character set. Used to store identifiers in the client character set.
Points to a query fragment. Points to a query fragment.
*/ */
struct Lex_ident_cli_st: public LEX_CSTRING struct Lex_ident_cli_st: public Lex_string_with_metadata_st
{ {
private:
bool m_is_8bit;
char m_quote;
public: public:
void set_keyword(const char *s, size_t len) void set_keyword(const char *s, size_t len)
{ {
str= s; set(s, len, false, '\0');
length= len;
m_is_8bit= false;
m_quote= '\0';
} }
void set_ident(const char *s, size_t len, bool is_8bit) void set_ident(const char *s, size_t len, bool is_8bit)
{ {
str= s; set(s, len, is_8bit, '\0');
length= len;
m_is_8bit= is_8bit;
m_quote= '\0';
} }
void set_ident_quoted(const char *s, size_t len, bool is_8bit, char quote) void set_ident_quoted(const char *s, size_t len, bool is_8bit, char quote)
{ {
str= s; set(s, len, is_8bit, quote);
length= len;
m_is_8bit= is_8bit;
m_quote= quote;
} }
void set_unquoted(const LEX_CSTRING *s, bool is_8bit) void set_unquoted(const LEX_CSTRING *s, bool is_8bit)
{ {
((LEX_CSTRING &)*this)= *s; set(s, is_8bit, '\0');
m_is_8bit= is_8bit;
m_quote= '\0';
} }
const char *pos() const { return str - is_quoted(); } const char *pos() const { return str - is_quoted(); }
const char *end() const { return str + length + is_quoted(); } const char *end() const { return str + length + is_quoted(); }
bool is_quoted() const { return m_quote != '\0'; }
bool is_8bit() const { return m_is_8bit; }
char quote() const { return m_quote; }
}; };
...@@ -2453,7 +2461,7 @@ class Lex_input_stream ...@@ -2453,7 +2461,7 @@ class Lex_input_stream
void body_utf8_append(const char *ptr); void body_utf8_append(const char *ptr);
void body_utf8_append(const char *ptr, const char *end_ptr); void body_utf8_append(const char *ptr, const char *end_ptr);
void body_utf8_append_ident(THD *thd, void body_utf8_append_ident(THD *thd,
const LEX_CSTRING *txt, const Lex_string_with_metadata_st *txt,
const char *end_ptr); const char *end_ptr);
void body_utf8_append_escape(THD *thd, void body_utf8_append_escape(THD *thd,
const LEX_CSTRING *txt, const LEX_CSTRING *txt,
......
...@@ -15170,57 +15170,26 @@ IDENT_sys: ...@@ -15170,57 +15170,26 @@ IDENT_sys:
TEXT_STRING_sys: TEXT_STRING_sys:
TEXT_STRING TEXT_STRING
{ {
if (thd->charset_is_system_charset) if (thd->make_text_string_sys(&$$, &$1))
$$= $1;
else
{
LEX_STRING to;
if (unlikely(thd->convert_string(&to, system_charset_info,
$1.str, $1.length,
thd->charset())))
MYSQL_YYABORT; MYSQL_YYABORT;
$$.str= to.str;
$$.length= to.length;
}
} }
; ;
TEXT_STRING_literal: TEXT_STRING_literal:
TEXT_STRING TEXT_STRING
{ {
if (thd->charset_is_collation_connection) if (thd->make_text_string_connection(&$$, &$1))
$$= $1;
else
{
LEX_STRING to;
if (unlikely(thd->convert_string(&to,
thd->variables.collation_connection,
$1.str, $1.length,
thd->charset())))
MYSQL_YYABORT; MYSQL_YYABORT;
$$.str= to.str;
$$.length= to.length;
}
} }
; ;
TEXT_STRING_filesystem: TEXT_STRING_filesystem:
TEXT_STRING TEXT_STRING
{ {
if (thd->charset_is_character_set_filesystem) if (thd->make_text_string_filesystem(&$$, &$1))
$$= $1;
else
{
LEX_STRING to;
if (unlikely(thd->convert_string(&to,
thd->variables.character_set_filesystem,
$1.str, $1.length,
thd->charset())))
MYSQL_YYABORT; MYSQL_YYABORT;
$$.str= to.str;
$$.length= to.length;
}
} }
;
ident_table_alias: ident_table_alias:
IDENT_sys IDENT_sys
......
...@@ -14919,56 +14919,24 @@ IDENT_sys: ...@@ -14919,56 +14919,24 @@ IDENT_sys:
TEXT_STRING_sys: TEXT_STRING_sys:
TEXT_STRING TEXT_STRING
{ {
if (thd->charset_is_system_charset) if (thd->make_text_string_sys(&$$, &$1))
$$= $1;
else
{
LEX_STRING to;
if (unlikely(thd->convert_string(&to, system_charset_info,
$1.str, $1.length,
thd->charset())))
MYSQL_YYABORT; MYSQL_YYABORT;
$$.str= to.str;
$$.length= to.length;
}
} }
; ;
TEXT_STRING_literal: TEXT_STRING_literal:
TEXT_STRING TEXT_STRING
{ {
if (thd->charset_is_collation_connection) if (thd->make_text_string_connection(&$$, &$1))
$$= $1;
else
{
LEX_STRING to;
if (unlikely(thd->convert_string(&to,
thd->variables.collation_connection,
$1.str, $1.length,
thd->charset())))
MYSQL_YYABORT; MYSQL_YYABORT;
$$.str= to.str;
$$.length= to.length;
}
} }
; ;
TEXT_STRING_filesystem: TEXT_STRING_filesystem:
TEXT_STRING TEXT_STRING
{ {
if (thd->charset_is_character_set_filesystem) if (thd->make_text_string_filesystem(&$$, &$1))
$$= $1;
else
{
LEX_STRING to;
if (unlikely(thd->convert_string(&to,
thd->variables.character_set_filesystem,
$1.str, $1.length,
thd->charset())))
MYSQL_YYABORT; MYSQL_YYABORT;
$$.str= to.str;
$$.length= to.length;
}
} }
; ;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment