diff --git a/mysql-test/r/ctype_recoding.result b/mysql-test/r/ctype_recoding.result index 72d19885101b6e35fc3674d44ec85d3df29d5d64..dc1f4c12e25cc595986eaf3d611b0e7bbde83198 100644 --- a/mysql-test/r/ctype_recoding.result +++ b/mysql-test/r/ctype_recoding.result @@ -174,3 +174,15 @@ Warnings: Warning 1265 Data truncated for column 'a' at row 1 Warning 1265 Data truncated for column 'b' at row 1 drop table t1; +set names koi8r; +create table t1 (a char(10) character set cp1251); +insert into t1 values (_koi8r'琢友'); +select * from t1 where a=_koi8r'琢友'; +a +琢友 +select * from t1 where a=concat(_koi8r'琢友'); +ERROR HY000: Illegal mix of collations (cp1251_general_ci,IMPLICIT) and (koi8r_general_ci,COERCIBLE) for operation '=' +select * from t1 where a=_latin1'琢友'; +ERROR HY000: Illegal mix of collations (cp1251_general_ci,IMPLICIT) and (latin1_swedish_ci,COERCIBLE) for operation '=' +drop table t1; +set names latin1; diff --git a/mysql-test/t/ctype_recoding.test b/mysql-test/t/ctype_recoding.test index 5f417352d952b2b40eff4487f29ff22d56fde5ee..dab898e9f2c03b2a8d3b32abfd03358c52bab932 100644 --- a/mysql-test/t/ctype_recoding.test +++ b/mysql-test/t/ctype_recoding.test @@ -131,3 +131,25 @@ create table t1 (a char(10) character set koi8r, b text character set koi8r); insert into t1 values ('test','test'); insert into t1 values ('拭账','拭账'); drop table t1; + +# +# Try to apply an automatic conversion in some cases: +# E.g. when mixing a column to a string, the string +# is converted into the column character set. +# If conversion loses data, then error. Otherwise, +# the string is replaced by its converted representation +# +set names koi8r; +create table t1 (a char(10) character set cp1251); +insert into t1 values (_koi8r'琢友'); +# this is possible: +select * from t1 where a=_koi8r'琢友'; +# this is not possible, because we have a function, not just a constant: +--error 1267 +select * from t1 where a=concat(_koi8r'琢友'); +# this is not posible, cannot convert _latin1'琢友' into cp1251: +--error 1267 +select * from t1 where a=_latin1'琢友'; +drop table t1; +set names latin1; + diff --git a/sql/item.cc b/sql/item.cc index b4c416e7741cd4043f1ec6b9ac9b938a61eea111..18025d6d6891fb8aba051bb5d16d39acc40c6665 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -259,7 +259,43 @@ CHARSET_INFO *Item::default_charset() return current_thd->variables.collation_connection; } -bool DTCollation::aggregate(DTCollation &dt, bool superset_conversion) + +/* + Aggregate two collations together taking + into account their coercibility (aka derivation): + + 0 == DERIVATION_EXPLICIT - an explicitely written COLLATE clause + 1 == DERIVATION_NONE - a mix of two different collations + 2 == DERIVATION_IMPLICIT - a column + 3 == DERIVATION_COERCIBLE - a string constant + + The most important rules are: + + 1. If collations are the same: + chose this collation, and the strongest derivation. + + 2. If collations are different: + - Character sets may differ, but only if conversion without + data loss is possible. The caller provides flags whether + character set conversion attempts should be done. If no + flags are substituted, then the character sets must be the same. + Currently processed flags are: + MY_COLL_ALLOW_SUPERSET_CONV - allow conversion to a superset + MY_COLL_ALLOW_COERCIBLE_CONV - allow conversion of a coercible value + - two EXPLICIT collations produce an error, e.g. this is wrong: + CONCAT(expr1 collate latin1_swedish_ci, expr2 collate latin1_german_ci) + - the side with smaller derivation value wins, + i.e. a column is stronger than a string constant, + an explicit COLLATE clause is stronger than a column. + - if derivations are the same, we have DERIVATION_NONE, + we'll wait for an explicit COLLATE clause which possibly can + come from another argument later: for example, this is valid, + but we don't know yet when collecting the first two arguments: + CONCAT(latin1_swedish_ci_column, + latin1_german1_ci_column, + expr COLLATE latin1_german2_ci) +*/ +bool DTCollation::aggregate(DTCollation &dt, uint flags) { nagg++; if (!my_charset_same(collation, dt.collation)) @@ -290,28 +326,37 @@ bool DTCollation::aggregate(DTCollation &dt, bool superset_conversion) else ; // Do nothing } - else if (superset_conversion) + else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) && + derivation < dt.derivation && + collation->state & MY_CS_UNICODE) { - if (derivation < dt.derivation && - collation->state & MY_CS_UNICODE) - ; // Do nothing - else if (dt.derivation < derivation && - dt.collation->state & MY_CS_UNICODE) - { - set(dt); - strong= nagg; - } - else - { - // Cannot convert to superset - set(0, DERIVATION_NONE); - return 1; - } + // Do nothing + } + else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) && + dt.derivation < derivation && + dt.collation->state & MY_CS_UNICODE) + { + set(dt); + strong= nagg; + } + else if ((flags & MY_COLL_ALLOW_COERCIBLE_CONV) && + derivation < dt.derivation && + dt.derivation == DERIVATION_COERCIBLE) + { + // Do nothing; + } + else if ((flags & MY_COLL_ALLOW_COERCIBLE_CONV) && + dt.derivation < derivation && + derivation == DERIVATION_COERCIBLE) + { + set(dt); + strong= nagg; } else { + // Cannot apply conversion set(0, DERIVATION_NONE); - return 1; + return 1; } } else if (derivation < dt.derivation) diff --git a/sql/item.h b/sql/item.h index e22cef7ba7fb0e9f45885246a1a658d0330142ce..2c0c3306c44082ca325afa50dfa9bfe54c6b4bf2 100644 --- a/sql/item.h +++ b/sql/item.h @@ -37,6 +37,16 @@ enum Derivation DERIVATION_EXPLICIT= 0 }; +/* + Flags for collation aggregation modes: + allow conversion to a superset + allow conversion of a coercible value (i.e. constant). +*/ + +#define MY_COLL_ALLOW_SUPERSET_CONV 1 +#define MY_COLL_ALLOW_COERCIBLE_CONV 2 + + class DTCollation { public: CHARSET_INFO *collation; @@ -72,9 +82,9 @@ public: { collation= collation_arg; } void set(Derivation derivation_arg) { derivation= derivation_arg; } - bool aggregate(DTCollation &dt, bool superset_conversion= FALSE); - bool set(DTCollation &dt1, DTCollation &dt2, bool superset_conversion= FALSE) - { set(dt1); return aggregate(dt2, superset_conversion); } + bool aggregate(DTCollation &dt, uint flags= 0); + bool set(DTCollation &dt1, DTCollation &dt2, uint flags= 0) + { set(dt1); return aggregate(dt2, flags); } const char *derivation_name() const { switch(derivation) diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index c9396aaa67c7939f8b0ecb04217f3cede67039e4..dc0377c791f749cac40f2fb1b4b7eff1681df98a 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -174,62 +174,87 @@ void Item_bool_func2::fix_length_and_dec() return; /* - We allow to convert to Unicode character sets in some cases. + We allow to apply automatic character set conversion in some cases. The conditions when conversion is possible are: - arguments A and B have different charsets - A wins according to coercibility rules - - character set of A is superset for character set of B - + (i.e. a column is stronger than a string constant, + an explicit COLLATE clause is stronger than a column) + - character set of A is either superset for character set of B, + or B is a string constant which can be converted into the + character set of A without data loss. + If all of the above is true, then it's possible to convert B into the character set of A, and then compare according to the collation of A. */ - if (args[0] && args[1]) - { - uint strong= 0; - uint weak= 0; - uint32 dummy_offset; - DTCollation coll; - - if (args[0]->result_type() == STRING_RESULT && - args[1]->result_type() == STRING_RESULT && - String::needs_conversion(0, args[0]->collation.collation, - args[1]->collation.collation, - &dummy_offset) && - !coll.set(args[0]->collation, args[1]->collation, TRUE)) + uint32 dummy_offset; + DTCollation coll; + + if (args[0]->result_type() == STRING_RESULT && + args[1]->result_type() == STRING_RESULT && + String::needs_conversion(0, args[0]->collation.collation, + args[1]->collation.collation, + &dummy_offset) && + !coll.set(args[0]->collation, args[1]->collation, + MY_COLL_ALLOW_SUPERSET_CONV | + MY_COLL_ALLOW_COERCIBLE_CONV)) + { + Item* conv= 0; + Item_arena *arena= thd->current_arena, backup; + uint strong= coll.strong; + uint weak= strong ? 0 : 1; + /* + In case we're in statement prepare, create conversion item + in its memory: it will be reused on each execute. + */ + if (arena->is_stmt_prepare()) + thd->set_n_backup_item_arena(arena, &backup); + if (args[weak]->type() == STRING_ITEM) { - Item* conv= 0; - Item_arena *arena= thd->current_arena, backup; - strong= coll.strong; - weak= strong ? 0 : 1; - /* - In case we're in statement prepare, create conversion item - in its memory: it will be reused on each execute. - */ - if (arena->is_stmt_prepare()) - thd->set_n_backup_item_arena(arena, &backup); - if (args[weak]->type() == STRING_ITEM) + uint conv_errors; + String tmp, cstr, *ostr= args[weak]->val_str(&tmp); + cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), + args[strong]->collation.collation, &conv_errors); + if (conv_errors) { - String tmp, cstr; - String *ostr= args[weak]->val_str(&tmp); - cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), - args[strong]->collation.collation); - conv= new Item_string(cstr.ptr(),cstr.length(),cstr.charset(), - args[weak]->collation.derivation); - ((Item_string*)conv)->str_value.copy(); + /* + We could not convert a string into the character set + of the stronger side of the operation without data loss. + It can happen if we tried to combine a column with a string + constant, and the column charset does not cover all the + characters from the string. Operation cannot be done + correctly. Return an error. + */ + my_coll_agg_error(args[0]->collation, args[1]->collation, + func_name()); + return; } - else + conv= new Item_string(cstr.ptr(),cstr.length(),cstr.charset(), + args[weak]->collation.derivation); + ((Item_string*)conv)->str_value.copy(); + } + else + { + if (!(coll.collation->state & MY_CS_UNICODE)) { - conv= new Item_func_conv_charset(args[weak], - args[strong]->collation.collation); - conv->collation.set(args[weak]->collation.derivation); - conv->fix_fields(thd, 0, &conv); + /* + Don't allow automatic conversion to non-Unicode charsets, + as it potentially loses data. + */ + my_coll_agg_error(args[0]->collation, args[1]->collation, + func_name()); + return; } - if (arena->is_stmt_prepare()) - thd->restore_backup_item_arena(arena, &backup); - args[weak]= conv ? conv : args[weak]; + conv= new Item_func_conv_charset(args[weak], + args[strong]->collation.collation); + conv->collation.set(args[weak]->collation.derivation); + conv->fix_fields(thd, 0, &conv); } + if (arena->is_stmt_prepare()) + thd->restore_backup_item_arena(arena, &backup); + args[weak]= conv ? conv : args[weak]; } // Make a special case of compare with fields to get nicer DATE comparisons @@ -1782,14 +1807,13 @@ void Item_func_in::fix_length_and_dec() via creating Item_func_conv_charset(). */ - if (agg_arg_collations_for_comparison(cmp_collation, - args, arg_count, TRUE)) + if (agg_arg_collations_for_comparison(cmp_collation, args, arg_count, + MY_COLL_ALLOW_SUPERSET_CONV)) return; if ((!my_charset_same(args[0]->collation.collation, cmp_collation.collation) || !const_itm)) { - if (agg_arg_collations_for_comparison(cmp_collation, - args, arg_count, FALSE)) + if (agg_arg_collations_for_comparison(cmp_collation, args, arg_count)) return; } else diff --git a/sql/item_func.cc b/sql/item_func.cc index 879c0b36bdd48345a07dc6d183143c98a37547e5..3cb125d286893cdef2d54cafc9c1ef8ee418bd84 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -76,7 +76,7 @@ static void my_coll_agg_error(Item** args, uint count, const char *fname) bool Item_func::agg_arg_collations(DTCollation &c, Item **av, uint count, - bool allow_superset_conversion) + uint flags) { uint i; c.nagg= 0; @@ -84,7 +84,7 @@ bool Item_func::agg_arg_collations(DTCollation &c, Item **av, uint count, c.set(av[0]->collation); for (i= 1; i < count; i++) { - if (c.aggregate(av[i]->collation, allow_superset_conversion)) + if (c.aggregate(av[i]->collation, flags)) { my_coll_agg_error(av, count, func_name()); return TRUE; @@ -96,9 +96,9 @@ bool Item_func::agg_arg_collations(DTCollation &c, Item **av, uint count, bool Item_func::agg_arg_collations_for_comparison(DTCollation &c, Item **av, uint count, - bool allow_superset_conv) + uint flags) { - if (agg_arg_collations(c, av, count, allow_superset_conv)) + if (agg_arg_collations(c, av, count, flags)) return TRUE; if (c.derivation == DERIVATION_NONE) diff --git a/sql/item_func.h b/sql/item_func.h index 6e43d66a32d040c48bc34a4b8c40dc1438575b36..963038227a228cccaf44b453a232a6ddcf4a0631 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -141,10 +141,10 @@ public: Item *get_tmp_table_item(THD *thd); bool agg_arg_collations(DTCollation &c, Item **items, uint nitems, - bool allow_superset_conversion= FALSE); + uint flags= 0); bool agg_arg_collations_for_comparison(DTCollation &c, Item **items, uint nitems, - bool allow_superset_comversion= FALSE); + uint flags= 0); bool walk(Item_processor processor, byte *arg); };