Commit 1427e1db authored by Alexander Barkov's avatar Alexander Barkov

MDEV-6661 PI() does not work well in UCS2/UTF16/UTF32 context

MDEV-6666 Malformed result for CONCAT(utf8_column, binary_string)

Item_static_string_func::safe_charset_converter() and 
Item_hex_string::safe_charset_converter() did not
handle character sets with mbminlen>1 properly, as well as
did not handle conversion from binary to multi-byte well.

Introducing Item::const_charset_converter(), to reuse it in a number
of Item_*::safe_charset_converter().
parent 18b307a7
...@@ -5323,3 +5323,15 @@ DROP TABLE t1; ...@@ -5323,3 +5323,15 @@ DROP TABLE t1;
# #
# End of 5.6 tests # End of 5.6 tests
# #
#
# Start of 10.0 tests
#
#
# MDEV-6661 PI() does not work well in UCS2/UTF16/UTF32 context
#
SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI;
PI
pi=3.141593
#
# End of 10.0 tests
#
...@@ -2078,3 +2078,45 @@ DFFFFFDFFFFF9CFFFF9DFFFF9EFFFF ...@@ -2078,3 +2078,45 @@ DFFFFFDFFFFF9CFFFF9DFFFF9EFFFF
# #
# End of 5.6 tests # End of 5.6 tests
# #
#
# Start of 10.0 tests
#
#
# MDEV-6661 PI() does not work well in UCS2/UTF16/UTF32 context
#
SELECT CONCAT(CONVERT('pi=' USING utf16),PI()) AS PI;
PI
pi=3.141593
#
# MDEV-6666 Malformed result for CONCAT(utf8_column, binary_string)
#
SET NAMES utf8mb4;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16);
INSERT INTO t1 VALUES ('a');
SELECT CONCAT(a,0xD800) FROM t1;
ERROR HY000: Invalid utf16 character string: 'D800'
SELECT CONCAT(a,0xD800DC00) FROM t1;
CONCAT(a,0xD800DC00)
a𐀀
SELECT CONCAT(a,0x00FF) FROM t1;
CONCAT(a,0x00FF)
aÿ
DROP TABLE t1;
SELECT CONCAT(_utf16'a' COLLATE utf16_unicode_ci, _binary 0xD800);
ERROR HY000: Invalid utf16 character string: 'D800'
PREPARE stmt FROM "SELECT CONCAT(_utf16'a' COLLATE utf16_unicode_ci, ?)";
SET @arg00=_binary 0xD800;
EXECUTE stmt USING @arg00;
ERROR HY000: Invalid utf16 character string: 'D800'
SET @arg00=_binary 0xD800DC00;
EXECUTE stmt USING @arg00;
CONCAT(_utf16'a' COLLATE utf16_unicode_ci, ?)
a𐀀
SET @arg00=_binary 0x00FF;
EXECUTE stmt USING @arg00;
CONCAT(_utf16'a' COLLATE utf16_unicode_ci, ?)
aÿ
DEALLOCATE PREPARE stmt;
#
# End of 10.0 tests
#
...@@ -2164,3 +2164,45 @@ DFFFFFDFFFFF9CFFFF9DFFFF9EFFFF ...@@ -2164,3 +2164,45 @@ DFFFFFDFFFFF9CFFFF9DFFFF9EFFFF
# #
# End of 5.6 tests # End of 5.6 tests
# #
#
# Start of 10.0 tests
#
#
# MDEV-6661 PI() does not work well in UCS2/UTF16/UTF32 context
#
SELECT CONCAT(CONVERT('pi=' USING utf32),PI()) AS PI;
PI
pi=3.141593
#
# MDEV-6666 Malformed result for CONCAT(utf8_column, binary_string)
#
SET NAMES utf8mb4;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32);
INSERT INTO t1 VALUES ('a');
SELECT CONCAT(a,0x20FFFF) FROM t1;
ERROR HY000: Invalid utf32 character string: '0020FF'
SELECT CONCAT(a,0x010000) FROM t1;
CONCAT(a,0x010000)
a𐀀
SELECT CONCAT(a,0x00FF) FROM t1;
CONCAT(a,0x00FF)
aÿ
DROP TABLE t1;
SELECT CONCAT(_utf32'a' COLLATE utf32_unicode_ci, _binary 0x20FFFF);
ERROR HY000: Invalid utf32 character string: '0020FF'
PREPARE stmt FROM "SELECT CONCAT(_utf32'a' COLLATE utf32_unicode_ci, ?)";
SET @arg00=_binary 0x20FFFF;
EXECUTE stmt USING @arg00;
ERROR HY000: Invalid utf32 character string: '0020FF'
SET @arg00=_binary 0x010000;
EXECUTE stmt USING @arg00;
CONCAT(_utf32'a' COLLATE utf32_unicode_ci, ?)
a𐀀
SET @arg00=_binary 0x00FF;
EXECUTE stmt USING @arg00;
CONCAT(_utf32'a' COLLATE utf32_unicode_ci, ?)
aÿ
DEALLOCATE PREPARE stmt;
#
# End of 10.0 tests
#
...@@ -5933,3 +5933,27 @@ set max_sort_length=default; ...@@ -5933,3 +5933,27 @@ set max_sort_length=default;
# #
# End of 5.6 tests # End of 5.6 tests
# #
#
# Start of 10.0 tests
#
#
# MDEV-6666 Malformed result for CONCAT(utf8_column, binary_string)
#
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8);
INSERT INTO t1 VALUES ('a');
SELECT CONCAT(a,0xFF) FROM t1;
ERROR HY000: Invalid utf8 character string: 'FF'
SELECT CONCAT(a,0xC3BF) FROM t1;
CONCAT(a,0xC3BF)
aÿ
DROP TABLE t1;
SELECT CONCAT('a' COLLATE utf8_unicode_ci, _binary 0xFF);
ERROR HY000: Invalid utf8 character string: 'FF'
PREPARE stmt FROM "SELECT CONCAT('a' COLLATE utf8_unicode_ci, ?)";
SET @arg00=_binary 0xFF;
EXECUTE stmt USING @arg00;
ERROR HY000: Invalid utf8 character string: 'FF'
DEALLOCATE PREPARE stmt;
#
# End of 10.0 tests
#
...@@ -891,3 +891,17 @@ DROP TABLE t1; ...@@ -891,3 +891,17 @@ DROP TABLE t1;
--echo # --echo #
--echo # End of 5.6 tests --echo # End of 5.6 tests
--echo # --echo #
--echo #
--echo # Start of 10.0 tests
--echo #
--echo #
--echo # MDEV-6661 PI() does not work well in UCS2/UTF16/UTF32 context
--echo #
SELECT CONCAT(CONVERT('pi=' USING ucs2),PI()) AS PI;
--echo #
--echo # End of 10.0 tests
--echo #
...@@ -816,3 +816,39 @@ set collation_connection=utf16_bin; ...@@ -816,3 +816,39 @@ set collation_connection=utf16_bin;
--echo # End of 5.6 tests --echo # End of 5.6 tests
--echo # --echo #
--echo #
--echo # Start of 10.0 tests
--echo #
--echo #
--echo # MDEV-6661 PI() does not work well in UCS2/UTF16/UTF32 context
--echo #
SELECT CONCAT(CONVERT('pi=' USING utf16),PI()) AS PI;
--echo #
--echo # MDEV-6666 Malformed result for CONCAT(utf8_column, binary_string)
--echo #
SET NAMES utf8mb4;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16);
INSERT INTO t1 VALUES ('a');
--error ER_INVALID_CHARACTER_STRING
SELECT CONCAT(a,0xD800) FROM t1;
SELECT CONCAT(a,0xD800DC00) FROM t1;
SELECT CONCAT(a,0x00FF) FROM t1;
DROP TABLE t1;
--error ER_INVALID_CHARACTER_STRING
SELECT CONCAT(_utf16'a' COLLATE utf16_unicode_ci, _binary 0xD800);
PREPARE stmt FROM "SELECT CONCAT(_utf16'a' COLLATE utf16_unicode_ci, ?)";
SET @arg00=_binary 0xD800;
--error ER_INVALID_CHARACTER_STRING
EXECUTE stmt USING @arg00;
SET @arg00=_binary 0xD800DC00;
EXECUTE stmt USING @arg00;
SET @arg00=_binary 0x00FF;
EXECUTE stmt USING @arg00;
DEALLOCATE PREPARE stmt;
--echo #
--echo # End of 10.0 tests
--echo #
-- source include/have_utf32.inc -- source include/have_utf32.inc
-- source include/have_utf8mb4.inc
SET TIME_ZONE = '+03:00'; SET TIME_ZONE = '+03:00';
...@@ -918,3 +919,40 @@ set collation_connection=utf32_bin; ...@@ -918,3 +919,40 @@ set collation_connection=utf32_bin;
--echo # --echo #
--echo #
--echo # Start of 10.0 tests
--echo #
--echo #
--echo # MDEV-6661 PI() does not work well in UCS2/UTF16/UTF32 context
--echo #
SELECT CONCAT(CONVERT('pi=' USING utf32),PI()) AS PI;
--echo #
--echo # MDEV-6666 Malformed result for CONCAT(utf8_column, binary_string)
--echo #
SET NAMES utf8mb4;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32);
INSERT INTO t1 VALUES ('a');
--error ER_INVALID_CHARACTER_STRING
SELECT CONCAT(a,0x20FFFF) FROM t1;
SELECT CONCAT(a,0x010000) FROM t1;
SELECT CONCAT(a,0x00FF) FROM t1;
DROP TABLE t1;
--error ER_INVALID_CHARACTER_STRING
SELECT CONCAT(_utf32'a' COLLATE utf32_unicode_ci, _binary 0x20FFFF);
PREPARE stmt FROM "SELECT CONCAT(_utf32'a' COLLATE utf32_unicode_ci, ?)";
SET @arg00=_binary 0x20FFFF;
--error ER_INVALID_CHARACTER_STRING
EXECUTE stmt USING @arg00;
SET @arg00=_binary 0x010000;
EXECUTE stmt USING @arg00;
SET @arg00=_binary 0x00FF;
EXECUTE stmt USING @arg00;
DEALLOCATE PREPARE stmt;
--echo #
--echo # End of 10.0 tests
--echo #
...@@ -1654,3 +1654,28 @@ set max_sort_length=default; ...@@ -1654,3 +1654,28 @@ set max_sort_length=default;
--echo # End of 5.6 tests --echo # End of 5.6 tests
--echo # --echo #
--echo #
--echo # Start of 10.0 tests
--echo #
--echo #
--echo # MDEV-6666 Malformed result for CONCAT(utf8_column, binary_string)
--echo #
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf8);
INSERT INTO t1 VALUES ('a');
--error ER_INVALID_CHARACTER_STRING
SELECT CONCAT(a,0xFF) FROM t1;
SELECT CONCAT(a,0xC3BF) FROM t1;
DROP TABLE t1;
--error ER_INVALID_CHARACTER_STRING
SELECT CONCAT('a' COLLATE utf8_unicode_ci, _binary 0xFF);
PREPARE stmt FROM "SELECT CONCAT('a' COLLATE utf8_unicode_ci, ?)";
SET @arg00=_binary 0xFF;
--error ER_INVALID_CHARACTER_STRING
EXECUTE stmt USING @arg00;
DEALLOCATE PREPARE stmt;
--echo #
--echo # End of 10.0 tests
--echo #
...@@ -1166,6 +1166,8 @@ bool Item::eq(const Item *item, bool binary_cmp) const ...@@ -1166,6 +1166,8 @@ bool Item::eq(const Item *item, bool binary_cmp) const
Item *Item::safe_charset_converter(CHARSET_INFO *tocs) Item *Item::safe_charset_converter(CHARSET_INFO *tocs)
{ {
if (!needs_charset_converter(tocs))
return this;
Item_func_conv_charset *conv= new Item_func_conv_charset(this, tocs, 1); Item_func_conv_charset *conv= new Item_func_conv_charset(this, tocs, 1);
return conv->safe ? conv : NULL; return conv->safe ? conv : NULL;
} }
...@@ -1192,77 +1194,55 @@ Item *Item_num::safe_charset_converter(CHARSET_INFO *tocs) ...@@ -1192,77 +1194,55 @@ Item *Item_num::safe_charset_converter(CHARSET_INFO *tocs)
if (!(tocs->state & MY_CS_NONASCII)) if (!(tocs->state & MY_CS_NONASCII))
return this; return this;
Item_string *conv; Item *conv;
uint conv_errors; if ((conv= const_charset_converter(tocs, true)))
char buf[64], buf2[64]; conv->fix_char_length(max_char_length());
String tmp(buf, sizeof(buf), &my_charset_bin);
String cstr(buf2, sizeof(buf2), &my_charset_bin);
String *ostr= val_str(&tmp);
char *ptr;
cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), tocs, &conv_errors);
if (conv_errors || !(conv= new Item_string(cstr.ptr(), cstr.length(),
cstr.charset(),
collation.derivation)))
{
/*
Safe conversion is not possible (or EOM).
We could not convert a string into the requested character set
without data loss. The target charset does not cover all the
characters from the string. Operation cannot be done correctly.
*/
return NULL;
}
if (!(ptr= current_thd->strmake(cstr.ptr(), cstr.length())))
return NULL;
conv->str_value.set(ptr, cstr.length(), cstr.charset());
/* Ensure that no one is going to change the result string */
conv->str_value.mark_as_const();
conv->fix_char_length(max_char_length());
return conv;
}
Item *Item_static_float_func::safe_charset_converter(CHARSET_INFO *tocs)
{
Item_string *conv;
char buf[64];
String *s, tmp(buf, sizeof(buf), &my_charset_bin);
s= val_str(&tmp);
if ((conv= new Item_static_string_func(func_name, s->ptr(), s->length(),
s->charset())))
{
conv->str_value.copy();
conv->str_value.mark_as_const();
}
return conv; return conv;
} }
Item *Item_string::safe_charset_converter(CHARSET_INFO *tocs)
{
return charset_converter(tocs, true);
}
/** /**
Convert a string item into the requested character set. Create character set converter for constant items
using Item_null, Item_string or Item_static_string_func.
@param tocs Character set to to convert the string to. @param tocs Character set to to convert the string to.
@param lossless Whether data loss is acceptable. @param lossless Whether data loss is acceptable.
@param func_name Function name, or NULL.
@return A new item representing the converted string.
@return this, if conversion is not needed,
NULL, if safe conversion is not possible, or
a new item representing the converted constant.
*/ */
Item *Item_string::charset_converter(CHARSET_INFO *tocs, bool lossless) Item *Item::const_charset_converter(CHARSET_INFO *tocs,
bool lossless,
const char *func_name)
{ {
Item_string *conv; DBUG_ASSERT(const_item());
DBUG_ASSERT(fixed);
StringBuffer<64>tmp;
String *s= val_str(&tmp);
if (!s)
return new Item_null((char *) func_name, tocs);
if (!needs_charset_converter(s->length(), tocs))
{
if (collation.collation == &my_charset_bin && tocs != &my_charset_bin &&
!this->check_well_formed_result(s, true))
return NULL;
return this;
}
uint conv_errors; uint conv_errors;
char *ptr; Item_string *conv= func_name ?
String tmp, cstr, *ostr= val_str(&tmp); new Item_static_string_func(func_name,
cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), tocs, &conv_errors); s, tocs, &conv_errors,
conv_errors= lossless && conv_errors; collation.derivation,
if (conv_errors || !(conv= new Item_string(cstr.ptr(), cstr.length(), collation.repertoire) :
cstr.charset(), new Item_string(s, tocs, &conv_errors,
collation.derivation))) collation.derivation,
collation.repertoire);
if (!conv || (conv_errors && lossless))
{ {
/* /*
Safe conversion is not possible (or EOM). Safe conversion is not possible (or EOM).
...@@ -1272,56 +1252,44 @@ Item *Item_string::charset_converter(CHARSET_INFO *tocs, bool lossless) ...@@ -1272,56 +1252,44 @@ Item *Item_string::charset_converter(CHARSET_INFO *tocs, bool lossless)
*/ */
return NULL; return NULL;
} }
if (!(ptr= current_thd->strmake(cstr.ptr(), cstr.length()))) if (s->charset() == &my_charset_bin && tocs != &my_charset_bin &&
!conv->check_well_formed_result(true))
return NULL; return NULL;
conv->str_value.set(ptr, cstr.length(), cstr.charset());
/* Ensure that no one is going to change the result string */
conv->str_value.mark_as_const();
return conv; return conv;
} }
Item *Item_param::safe_charset_converter(CHARSET_INFO *tocs) Item *Item_param::safe_charset_converter(CHARSET_INFO *tocs)
{ {
/*
Return "this" if in prepare. result_type may change at execition time,
to it's possible that the converter will not be needed at all:
PREPARE stmt FROM 'SELECT * FROM t1 WHERE field = ?';
SET @@arg= 1;
EXECUTE stms USING @arg;
result_type is STRING_RESULT at prepare time,
and INT_RESULT at execution time.
*/
if (const_item()) if (const_item())
{ {
uint cnv_errors; uint cnv_errors;
String *ostr= val_str(&cnvstr); String *ostr= val_str(&cnvstr);
if (!needs_charset_converter(tocs))
return this;
cnvitem->str_value.copy(ostr->ptr(), ostr->length(), cnvitem->str_value.copy(ostr->ptr(), ostr->length(),
ostr->charset(), tocs, &cnv_errors); ostr->charset(), tocs, &cnv_errors);
if (cnv_errors) if (cnv_errors)
return NULL; return NULL;
if (ostr->charset() == &my_charset_bin && tocs != &my_charset_bin &&
!cnvitem->check_well_formed_result(&cnvitem->str_value, true))
return NULL;
cnvitem->str_value.mark_as_const(); cnvitem->str_value.mark_as_const();
cnvitem->max_length= cnvitem->str_value.numchars() * tocs->mbmaxlen; cnvitem->max_length= cnvitem->str_value.numchars() * tocs->mbmaxlen;
return cnvitem; return cnvitem;
} }
return Item::safe_charset_converter(tocs); return this;
}
Item *Item_static_string_func::safe_charset_converter(CHARSET_INFO *tocs)
{
Item_string *conv;
uint conv_errors;
String tmp, cstr, *ostr= val_str(&tmp);
cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), tocs, &conv_errors);
if (conv_errors ||
!(conv= new Item_static_string_func(func_name,
cstr.ptr(), cstr.length(),
cstr.charset(),
collation.derivation)))
{
/*
Safe conversion is not possible (or EOM).
We could not convert a string into the requested character set
without data loss. The target charset does not cover all the
characters from the string. Operation cannot be done correctly.
*/
return NULL;
}
conv->str_value.copy();
/* Ensure that no one is going to change the result string */
conv->str_value.mark_as_const();
return conv;
} }
...@@ -2203,33 +2171,10 @@ bool agg_item_set_converter(DTCollation &coll, const char *fname, ...@@ -2203,33 +2171,10 @@ bool agg_item_set_converter(DTCollation &coll, const char *fname,
for (i= 0, arg= args; i < nargs; i++, arg+= item_sep) for (i= 0, arg= args; i < nargs; i++, arg+= item_sep)
{ {
Item* conv; Item* conv= (*arg)->safe_charset_converter(coll.collation);
uint32 dummy_offset; if (conv == *arg)
if (!String::needs_conversion(1, (*arg)->collation.collation,
coll.collation,
&dummy_offset))
continue; continue;
if (!conv && ((*arg)->collation.repertoire == MY_REPERTOIRE_ASCII))
/*
No needs to add converter if an "arg" is NUMERIC or DATETIME
value (which is pure ASCII) and at the same time target DTCollation
is ASCII-compatible. For example, no needs to rewrite:
SELECT * FROM t1 WHERE datetime_field = '2010-01-01';
to
SELECT * FROM t1 WHERE CONVERT(datetime_field USING cs) = '2010-01-01';
TODO: avoid conversion of any values with
repertoire ASCII and 7bit-ASCII-compatible,
not only numeric/datetime origin.
*/
if ((*arg)->collation.derivation == DERIVATION_NUMERIC &&
(*arg)->collation.repertoire == MY_REPERTOIRE_ASCII &&
!((*arg)->collation.collation->state & MY_CS_NONASCII) &&
!(coll.collation->state & MY_CS_NONASCII))
continue;
if (!(conv= (*arg)->safe_charset_converter(coll.collation)) &&
((*arg)->collation.repertoire == MY_REPERTOIRE_ASCII))
conv= new Item_func_conv_charset(*arg, coll.collation, 1); conv= new Item_func_conv_charset(*arg, coll.collation, 1);
if (!conv) if (!conv)
...@@ -3015,7 +2960,7 @@ String *Item_float::val_str(String *str) ...@@ -3015,7 +2960,7 @@ String *Item_float::val_str(String *str)
{ {
// following assert is redundant, because fixed=1 assigned in constructor // following assert is redundant, because fixed=1 assigned in constructor
DBUG_ASSERT(fixed == 1); DBUG_ASSERT(fixed == 1);
str->set_real(value,decimals,&my_charset_bin); str->set_real(value, decimals, &my_charset_numeric);
return str; return str;
} }
...@@ -5375,13 +5320,6 @@ bool Item_field::vcol_in_partition_func_processor(uchar *int_arg) ...@@ -5375,13 +5320,6 @@ bool Item_field::vcol_in_partition_func_processor(uchar *int_arg)
} }
Item *Item_field::safe_charset_converter(CHARSET_INFO *tocs)
{
no_const_subst= 1;
return Item::safe_charset_converter(tocs);
}
void Item_field::cleanup() void Item_field::cleanup()
{ {
DBUG_ENTER("Item_field::cleanup"); DBUG_ENTER("Item_field::cleanup");
...@@ -5687,10 +5625,7 @@ String *Item::check_well_formed_result(String *str, bool send_error) ...@@ -5687,10 +5625,7 @@ String *Item::check_well_formed_result(String *str, bool send_error)
{ {
/* Check whether we got a well-formed string */ /* Check whether we got a well-formed string */
CHARSET_INFO *cs= str->charset(); CHARSET_INFO *cs= str->charset();
int well_formed_error; uint wlen= str->well_formed_length();
uint wlen= cs->cset->well_formed_len(cs,
str->ptr(), str->ptr() + str->length(),
str->length(), &well_formed_error);
if (wlen < str->length()) if (wlen < str->length())
{ {
THD *thd= current_thd; THD *thd= current_thd;
...@@ -6441,19 +6376,6 @@ bool Item_hex_constant::eq(const Item *arg, bool binary_cmp) const ...@@ -6441,19 +6376,6 @@ bool Item_hex_constant::eq(const Item *arg, bool binary_cmp) const
} }
Item *Item_hex_constant::safe_charset_converter(CHARSET_INFO *tocs)
{
Item_string *conv;
String tmp, *str= val_str(&tmp);
if (!(conv= new Item_string(str->ptr(), str->length(), tocs)))
return NULL;
conv->str_value.copy();
conv->str_value.mark_as_const();
return conv;
}
/* /*
bin item. bin item.
In string context this is a binary string. In string context this is a binary string.
......
...@@ -1463,6 +1463,48 @@ public: ...@@ -1463,6 +1463,48 @@ public:
virtual Item *expr_cache_insert_transformer(uchar *thd_arg) { return this; } virtual Item *expr_cache_insert_transformer(uchar *thd_arg) { return this; }
virtual bool expr_cache_is_needed(THD *) { return FALSE; } virtual bool expr_cache_is_needed(THD *) { return FALSE; }
virtual Item *safe_charset_converter(CHARSET_INFO *tocs); virtual Item *safe_charset_converter(CHARSET_INFO *tocs);
bool needs_charset_converter(uint32 length, CHARSET_INFO *tocs)
{
/*
This will return "true" if conversion happens:
- between two non-binary different character sets
- from "binary" to "unsafe" character set
(those that can have non-well-formed string)
- from "binary" to UCS2-alike character set with mbminlen>1,
when prefix left-padding is needed for an incomplete character:
binary 0xFF -> ucs2 0x00FF)
*/
if (!String::needs_conversion_on_storage(length,
collation.collation, tocs))
return false;
/*
No needs to add converter if an "arg" is NUMERIC or DATETIME
value (which is pure ASCII) and at the same time target DTCollation
is ASCII-compatible. For example, no needs to rewrite:
SELECT * FROM t1 WHERE datetime_field = '2010-01-01';
to
SELECT * FROM t1 WHERE CONVERT(datetime_field USING cs) = '2010-01-01';
TODO: avoid conversion of any values with
repertoire ASCII and 7bit-ASCII-compatible,
not only numeric/datetime origin.
*/
if (collation.derivation == DERIVATION_NUMERIC &&
collation.repertoire == MY_REPERTOIRE_ASCII &&
!(collation.collation->state & MY_CS_NONASCII) &&
!(tocs->state & MY_CS_NONASCII))
return false;
return true;
}
bool needs_charset_converter(CHARSET_INFO *tocs)
{
// Pass 1 as length to force conversion if tocs->mbminlen>1.
return needs_charset_converter(1, tocs);
}
Item *const_charset_converter(CHARSET_INFO *tocs, bool lossless,
const char *func_name);
Item *const_charset_converter(CHARSET_INFO *tocs, bool lossless)
{ return const_charset_converter(tocs, lossless, NULL); }
void delete_self() void delete_self()
{ {
cleanup(); cleanup();
...@@ -2189,7 +2231,6 @@ public: ...@@ -2189,7 +2231,6 @@ public:
Item *replace_equal_field(uchar *arg); Item *replace_equal_field(uchar *arg);
inline uint32 max_disp_length() { return field->max_display_length(); } inline uint32 max_disp_length() { return field->max_display_length(); }
Item_field *field_for_view_update() { return this; } Item_field *field_for_view_update() { return this; }
Item *safe_charset_converter(CHARSET_INFO *tocs);
int fix_outer_field(THD *thd, Field **field, Item **reference); int fix_outer_field(THD *thd, Field **field, Item **reference);
virtual Item *update_value_transformer(uchar *select_arg); virtual Item *update_value_transformer(uchar *select_arg);
virtual void print(String *str, enum_query_type query_type); virtual void print(String *str, enum_query_type query_type);
...@@ -2213,13 +2254,13 @@ public: ...@@ -2213,13 +2254,13 @@ public:
class Item_null :public Item_basic_constant class Item_null :public Item_basic_constant
{ {
public: public:
Item_null(char *name_par=0) Item_null(char *name_par=0, CHARSET_INFO *cs= &my_charset_bin)
{ {
maybe_null= null_value= TRUE; maybe_null= null_value= TRUE;
max_length= 0; max_length= 0;
name= name_par ? name_par : (char*) "NULL"; name= name_par ? name_par : (char*) "NULL";
fixed= 1; fixed= 1;
collation.set(&my_charset_bin, DERIVATION_IGNORABLE); collation.set(cs, DERIVATION_IGNORABLE);
} }
enum Type type() const { return NULL_ITEM; } enum Type type() const { return NULL_ITEM; }
bool eq(const Item *item, bool binary_cmp) const; bool eq(const Item *item, bool binary_cmp) const;
...@@ -2594,7 +2635,10 @@ public: ...@@ -2594,7 +2635,10 @@ public:
str->append(func_name); str->append(func_name);
} }
Item *safe_charset_converter(CHARSET_INFO *tocs); Item *safe_charset_converter(CHARSET_INFO *tocs)
{
return const_charset_converter(tocs, true, func_name);
}
}; };
...@@ -2621,6 +2665,19 @@ public: ...@@ -2621,6 +2665,19 @@ public:
// it is constant => can be used without fix_fields (and frequently used) // it is constant => can be used without fix_fields (and frequently used)
fixed= 1; fixed= 1;
} }
Item_string(const String *str, CHARSET_INFO *tocs, uint *conv_errors,
Derivation dv, uint repertoire)
:m_cs_specified(false)
{
if (str_value.copy(str, tocs, conv_errors))
str_value.set("", 0, tocs); // EOM ?
str_value.mark_as_const();
collation.set(tocs, dv, repertoire);
fix_char_length(str_value.numchars());
set_name(str_value.ptr(), str_value.length(), tocs);
decimals= NOT_FIXED_DEC;
fixed= 1;
}
/* Just create an item and do not fill string representation */ /* Just create an item and do not fill string representation */
Item_string(CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE) Item_string(CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
: m_cs_specified(FALSE) : m_cs_specified(FALSE)
...@@ -2678,8 +2735,10 @@ public: ...@@ -2678,8 +2735,10 @@ public:
return new Item_string(name, str_value.ptr(), return new Item_string(name, str_value.ptr(),
str_value.length(), collation.collation); str_value.length(), collation.collation);
} }
Item *safe_charset_converter(CHARSET_INFO *tocs); Item *safe_charset_converter(CHARSET_INFO *tocs)
Item *charset_converter(CHARSET_INFO *tocs, bool lossless); {
return const_charset_converter(tocs, true);
}
inline void append(char *str, uint length) inline void append(char *str, uint length)
{ {
str_value.append(str, length); str_value.append(str, length);
...@@ -2728,6 +2787,9 @@ public: ...@@ -2728,6 +2787,9 @@ public:
m_cs_specified= cs_specified; m_cs_specified= cs_specified;
} }
String *check_well_formed_result(bool send_error)
{ return Item::check_well_formed_result(&str_value, send_error); }
private: private:
bool m_cs_specified; bool m_cs_specified;
}; };
...@@ -2749,7 +2811,17 @@ public: ...@@ -2749,7 +2811,17 @@ public:
Derivation dv= DERIVATION_COERCIBLE) Derivation dv= DERIVATION_COERCIBLE)
:Item_string(NullS, str, length, cs, dv), func_name(name_par) :Item_string(NullS, str, length, cs, dv), func_name(name_par)
{} {}
Item *safe_charset_converter(CHARSET_INFO *tocs); Item_static_string_func(const char *name_par,
const String *str,
CHARSET_INFO *tocs, uint *conv_errors,
Derivation dv, uint repertoire)
:Item_string(str, tocs, conv_errors, dv, repertoire),
func_name(name_par)
{}
Item *safe_charset_converter(CHARSET_INFO *tocs)
{
return const_charset_converter(tocs, true, func_name);
}
virtual inline void print(String *str, enum_query_type query_type) virtual inline void print(String *str, enum_query_type query_type)
{ {
...@@ -2852,7 +2924,10 @@ public: ...@@ -2852,7 +2924,10 @@ public:
enum Type type() const { return VARBIN_ITEM; } enum Type type() const { return VARBIN_ITEM; }
enum Item_result result_type () const { return STRING_RESULT; } enum Item_result result_type () const { return STRING_RESULT; }
enum_field_types field_type() const { return MYSQL_TYPE_VARCHAR; } enum_field_types field_type() const { return MYSQL_TYPE_VARCHAR; }
virtual Item *safe_charset_converter(CHARSET_INFO *tocs); virtual Item *safe_charset_converter(CHARSET_INFO *tocs)
{
return const_charset_converter(tocs, true);
}
bool check_partition_func_processor(uchar *int_arg) {return FALSE;} bool check_partition_func_processor(uchar *int_arg) {return FALSE;}
bool check_vcol_func_processor(uchar *arg) { return FALSE;} bool check_vcol_func_processor(uchar *arg) { return FALSE;}
bool basic_const_item() const { return 1; } bool basic_const_item() const { return 1; }
......
...@@ -2328,32 +2328,6 @@ void Item_func_decode::crypto_transform(String *res) ...@@ -2328,32 +2328,6 @@ void Item_func_decode::crypto_transform(String *res)
} }
Item *Item_func_sysconst::safe_charset_converter(CHARSET_INFO *tocs)
{
Item_string *conv;
uint conv_errors;
String tmp, cstr, *ostr= val_str(&tmp);
if (null_value)
{
Item *null_item= new Item_null((char *) fully_qualified_func_name());
null_item->collation.set (tocs);
return null_item;
}
cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), tocs, &conv_errors);
if (conv_errors ||
!(conv= new Item_static_string_func(fully_qualified_func_name(),
cstr.ptr(), cstr.length(),
cstr.charset(),
collation.derivation)))
{
return NULL;
}
conv->str_value.copy();
conv->str_value.mark_as_const();
return conv;
}
String *Item_func_database::val_str(String *str) String *Item_func_database::val_str(String *str)
{ {
DBUG_ASSERT(fixed == 1); DBUG_ASSERT(fixed == 1);
......
...@@ -542,7 +542,10 @@ class Item_func_sysconst :public Item_str_func ...@@ -542,7 +542,10 @@ class Item_func_sysconst :public Item_str_func
public: public:
Item_func_sysconst() Item_func_sysconst()
{ collation.set(system_charset_info,DERIVATION_SYSCONST); } { collation.set(system_charset_info,DERIVATION_SYSCONST); }
Item *safe_charset_converter(CHARSET_INFO *tocs); Item *safe_charset_converter(CHARSET_INFO *tocs)
{
return const_charset_converter(tocs, true, fully_qualified_func_name());
}
/* /*
Used to create correct Item name in new converted item in Used to create correct Item name in new converted item in
safe_charset_converter, return string representation of this function safe_charset_converter, return string representation of this function
......
...@@ -532,6 +532,32 @@ public: ...@@ -532,6 +532,32 @@ public:
}; };
/**
A string whose value may be changed during execution.
*/
class Item_string_xml_non_const: public Item_string
{
public:
Item_string_xml_non_const(const char *str, uint length, CHARSET_INFO *cs)
:Item_string(str, length, cs)
{ }
bool const_item() const { return false ; }
bool basic_const_item() const { return false; }
void set_value(const char *str, uint length, CHARSET_INFO *cs)
{
str_value.set(str, length, cs);
}
Item *safe_charset_converter(CHARSET_INFO *tocs)
{
/*
Item_string::safe_charset_converter() does not accept non-constants.
Note, conversion is not really needed here anyway.
*/
return this;
}
};
class Item_nodeset_to_const_comparator :public Item_bool_func class Item_nodeset_to_const_comparator :public Item_bool_func
{ {
String *pxml; String *pxml;
...@@ -550,7 +576,8 @@ public: ...@@ -550,7 +576,8 @@ public:
longlong val_int() longlong val_int()
{ {
Item_func *comp= (Item_func*)args[1]; Item_func *comp= (Item_func*)args[1];
Item_string *fake= (Item_string*)(comp->arguments()[0]); Item_string_xml_non_const *fake=
(Item_string_xml_non_const*)(comp->arguments()[0]);
String *res= args[0]->val_nodeset(&tmp_nodeset); String *res= args[0]->val_nodeset(&tmp_nodeset);
MY_XPATH_FLT *fltbeg= (MY_XPATH_FLT*) res->ptr(); MY_XPATH_FLT *fltbeg= (MY_XPATH_FLT*) res->ptr();
MY_XPATH_FLT *fltend= (MY_XPATH_FLT*) (res->ptr() + res->length()); MY_XPATH_FLT *fltend= (MY_XPATH_FLT*) (res->ptr() + res->length());
...@@ -568,8 +595,8 @@ public: ...@@ -568,8 +595,8 @@ public:
if ((node->parent == flt->num) && if ((node->parent == flt->num) &&
(node->type == MY_XML_NODE_TEXT)) (node->type == MY_XML_NODE_TEXT))
{ {
fake->str_value.set(node->beg, node->end - node->beg, fake->set_value(node->beg, node->end - node->beg,
collation.collation); collation.collation);
if (args[1]->val_int()) if (args[1]->val_int())
return 1; return 1;
} }
...@@ -956,14 +983,12 @@ static Item *create_comparator(MY_XPATH *xpath, ...@@ -956,14 +983,12 @@ static Item *create_comparator(MY_XPATH *xpath,
{ {
/* /*
Compare a node set to a scalar value. Compare a node set to a scalar value.
We just create a fake Item_string() argument, We just create a fake Item_string_xml_non_const() argument,
which will be filled to the partular value which will be filled to the partular value
in a loop through all of the nodes in the node set. in a loop through all of the nodes in the node set.
*/ */
Item_string *fake= new Item_string("", 0, xpath->cs); Item_string *fake= new Item_string_xml_non_const("", 0, xpath->cs);
/* Don't cache fake because its value will be changed during comparison.*/
fake->set_used_tables(RAND_TABLE_BIT);
Item_nodeset_func *nodeset; Item_nodeset_func *nodeset;
Item *scalar, *comp; Item *scalar, *comp;
if (a->type() == Item::XPATH_NODESET) if (a->type() == Item::XPATH_NODESET)
......
...@@ -267,9 +267,11 @@ Condition_information_item::make_utf8_string_item(THD *thd, const String *str) ...@@ -267,9 +267,11 @@ Condition_information_item::make_utf8_string_item(THD *thd, const String *str)
CHARSET_INFO *to_cs= &my_charset_utf8_general_ci; CHARSET_INFO *to_cs= &my_charset_utf8_general_ci;
/* If a charset was not set, assume that no conversion is needed. */ /* If a charset was not set, assume that no conversion is needed. */
CHARSET_INFO *from_cs= str->charset() ? str->charset() : to_cs; CHARSET_INFO *from_cs= str->charset() ? str->charset() : to_cs;
Item_string *item= new Item_string(str->ptr(), str->length(), from_cs); String tmp(str->ptr(), str->length(), from_cs);
/* If necessary, convert the string (ignoring errors), then copy it over. */ /* If necessary, convert the string (ignoring errors), then copy it over. */
return item ? item->charset_converter(to_cs, false) : NULL; uint conv_errors;
return new Item_string(&tmp, to_cs, &conv_errors,
DERIVATION_COERCIBLE, MY_REPERTOIRE_UNICODE30);
} }
......
...@@ -351,6 +351,10 @@ public: ...@@ -351,6 +351,10 @@ public:
bool set_or_copy_aligned(const char *s, uint32 arg_length, CHARSET_INFO *cs); bool set_or_copy_aligned(const char *s, uint32 arg_length, CHARSET_INFO *cs);
bool copy(const char*s,uint32 arg_length, CHARSET_INFO *csfrom, bool copy(const char*s,uint32 arg_length, CHARSET_INFO *csfrom,
CHARSET_INFO *csto, uint *errors); CHARSET_INFO *csto, uint *errors);
bool copy(const String *str, CHARSET_INFO *tocs, uint *errors)
{
return copy(str->ptr(), str->length(), str->charset(), tocs, errors);
}
void move(String &s) void move(String &s)
{ {
free(); free();
...@@ -517,6 +521,12 @@ public: ...@@ -517,6 +521,12 @@ public:
{ {
return (s->alloced && Ptr >= s->Ptr && Ptr < s->Ptr + s->str_length); return (s->alloced && Ptr >= s->Ptr && Ptr < s->Ptr + s->str_length);
} }
uint well_formed_length() const
{
int dummy_error;
return charset()->cset->well_formed_len(charset(), ptr(), ptr() + length(),
length(), &dummy_error);
}
bool is_ascii() const bool is_ascii() const
{ {
if (length() == 0) if (length() == 0)
......
...@@ -13432,7 +13432,7 @@ literal: ...@@ -13432,7 +13432,7 @@ literal:
str ? str->length() : 0, str ? str->length() : 0,
$1); $1);
if (!item_str || if (!item_str ||
!item_str->check_well_formed_result(&item_str->str_value, TRUE)) !item_str->check_well_formed_result(true))
{ {
MYSQL_YYABORT; MYSQL_YYABORT;
} }
...@@ -13461,7 +13461,7 @@ literal: ...@@ -13461,7 +13461,7 @@ literal:
str ? str->length() : 0, str ? str->length() : 0,
$1); $1);
if (!item_str || if (!item_str ||
!item_str->check_well_formed_result(&item_str->str_value, TRUE)) !item_str->check_well_formed_result(true))
{ {
MYSQL_YYABORT; MYSQL_YYABORT;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment