Commit ea49a518 authored by bar@mysql.com's avatar bar@mysql.com

Allow to convert to non-Unicode charset when mixing a string

constant with a column. The string is converted into the column
character set. It conversion doesn't lose data, then operation
is possible. Otherwise, give an error, as it was earlier.
parent 8d620b93
...@@ -174,3 +174,15 @@ Warnings: ...@@ -174,3 +174,15 @@ Warnings:
Warning 1265 Data truncated for column 'a' at row 1 Warning 1265 Data truncated for column 'a' at row 1
Warning 1265 Data truncated for column 'b' at row 1 Warning 1265 Data truncated for column 'b' at row 1
drop table t1; drop table t1;
set names koi8r;
create table t1 (a char(10) character set cp1251);
insert into t1 values (_koi8r'');
select * from t1 where a=_koi8r'';
a
select * from t1 where a=concat(_koi8r'');
ERROR HY000: Illegal mix of collations (cp1251_general_ci,IMPLICIT) and (koi8r_general_ci,COERCIBLE) for operation '='
select * from t1 where a=_latin1'';
ERROR HY000: Illegal mix of collations (cp1251_general_ci,IMPLICIT) and (latin1_swedish_ci,COERCIBLE) for operation '='
drop table t1;
set names latin1;
...@@ -131,3 +131,25 @@ create table t1 (a char(10) character set koi8r, b text character set koi8r); ...@@ -131,3 +131,25 @@ create table t1 (a char(10) character set koi8r, b text character set koi8r);
insert into t1 values ('test','test'); insert into t1 values ('test','test');
insert into t1 values ('',''); insert into t1 values ('','');
drop table t1; drop table t1;
#
# Try to apply an automatic conversion in some cases:
# E.g. when mixing a column to a string, the string
# is converted into the column character set.
# If conversion loses data, then error. Otherwise,
# the string is replaced by its converted representation
#
set names koi8r;
create table t1 (a char(10) character set cp1251);
insert into t1 values (_koi8r'');
# this is possible:
select * from t1 where a=_koi8r'';
# this is not possible, because we have a function, not just a constant:
--error 1267
select * from t1 where a=concat(_koi8r'');
# this is not posible, cannot convert _latin1'' into cp1251:
--error 1267
select * from t1 where a=_latin1'';
drop table t1;
set names latin1;
...@@ -259,7 +259,43 @@ CHARSET_INFO *Item::default_charset() ...@@ -259,7 +259,43 @@ CHARSET_INFO *Item::default_charset()
return current_thd->variables.collation_connection; return current_thd->variables.collation_connection;
} }
bool DTCollation::aggregate(DTCollation &dt, bool superset_conversion)
/*
Aggregate two collations together taking
into account their coercibility (aka derivation):
0 == DERIVATION_EXPLICIT - an explicitely written COLLATE clause
1 == DERIVATION_NONE - a mix of two different collations
2 == DERIVATION_IMPLICIT - a column
3 == DERIVATION_COERCIBLE - a string constant
The most important rules are:
1. If collations are the same:
chose this collation, and the strongest derivation.
2. If collations are different:
- Character sets may differ, but only if conversion without
data loss is possible. The caller provides flags whether
character set conversion attempts should be done. If no
flags are substituted, then the character sets must be the same.
Currently processed flags are:
MY_COLL_ALLOW_SUPERSET_CONV - allow conversion to a superset
MY_COLL_ALLOW_COERCIBLE_CONV - allow conversion of a coercible value
- two EXPLICIT collations produce an error, e.g. this is wrong:
CONCAT(expr1 collate latin1_swedish_ci, expr2 collate latin1_german_ci)
- the side with smaller derivation value wins,
i.e. a column is stronger than a string constant,
an explicit COLLATE clause is stronger than a column.
- if derivations are the same, we have DERIVATION_NONE,
we'll wait for an explicit COLLATE clause which possibly can
come from another argument later: for example, this is valid,
but we don't know yet when collecting the first two arguments:
CONCAT(latin1_swedish_ci_column,
latin1_german1_ci_column,
expr COLLATE latin1_german2_ci)
*/
bool DTCollation::aggregate(DTCollation &dt, uint flags)
{ {
nagg++; nagg++;
if (!my_charset_same(collation, dt.collation)) if (!my_charset_same(collation, dt.collation))
...@@ -290,28 +326,37 @@ bool DTCollation::aggregate(DTCollation &dt, bool superset_conversion) ...@@ -290,28 +326,37 @@ bool DTCollation::aggregate(DTCollation &dt, bool superset_conversion)
else else
; // Do nothing ; // Do nothing
} }
else if (superset_conversion) else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
derivation < dt.derivation &&
collation->state & MY_CS_UNICODE)
{ {
if (derivation < dt.derivation && // Do nothing
collation->state & MY_CS_UNICODE) }
; // Do nothing else if ((flags & MY_COLL_ALLOW_SUPERSET_CONV) &&
else if (dt.derivation < derivation && dt.derivation < derivation &&
dt.collation->state & MY_CS_UNICODE) dt.collation->state & MY_CS_UNICODE)
{ {
set(dt); set(dt);
strong= nagg; strong= nagg;
} }
else else if ((flags & MY_COLL_ALLOW_COERCIBLE_CONV) &&
{ derivation < dt.derivation &&
// Cannot convert to superset dt.derivation == DERIVATION_COERCIBLE)
set(0, DERIVATION_NONE); {
return 1; // Do nothing;
} }
else if ((flags & MY_COLL_ALLOW_COERCIBLE_CONV) &&
dt.derivation < derivation &&
derivation == DERIVATION_COERCIBLE)
{
set(dt);
strong= nagg;
} }
else else
{ {
// Cannot apply conversion
set(0, DERIVATION_NONE); set(0, DERIVATION_NONE);
return 1; return 1;
} }
} }
else if (derivation < dt.derivation) else if (derivation < dt.derivation)
......
...@@ -37,6 +37,16 @@ enum Derivation ...@@ -37,6 +37,16 @@ enum Derivation
DERIVATION_EXPLICIT= 0 DERIVATION_EXPLICIT= 0
}; };
/*
Flags for collation aggregation modes:
allow conversion to a superset
allow conversion of a coercible value (i.e. constant).
*/
#define MY_COLL_ALLOW_SUPERSET_CONV 1
#define MY_COLL_ALLOW_COERCIBLE_CONV 2
class DTCollation { class DTCollation {
public: public:
CHARSET_INFO *collation; CHARSET_INFO *collation;
...@@ -72,9 +82,9 @@ public: ...@@ -72,9 +82,9 @@ public:
{ collation= collation_arg; } { collation= collation_arg; }
void set(Derivation derivation_arg) void set(Derivation derivation_arg)
{ derivation= derivation_arg; } { derivation= derivation_arg; }
bool aggregate(DTCollation &dt, bool superset_conversion= FALSE); bool aggregate(DTCollation &dt, uint flags= 0);
bool set(DTCollation &dt1, DTCollation &dt2, bool superset_conversion= FALSE) bool set(DTCollation &dt1, DTCollation &dt2, uint flags= 0)
{ set(dt1); return aggregate(dt2, superset_conversion); } { set(dt1); return aggregate(dt2, flags); }
const char *derivation_name() const const char *derivation_name() const
{ {
switch(derivation) switch(derivation)
......
...@@ -174,62 +174,87 @@ void Item_bool_func2::fix_length_and_dec() ...@@ -174,62 +174,87 @@ void Item_bool_func2::fix_length_and_dec()
return; return;
/* /*
We allow to convert to Unicode character sets in some cases. We allow to apply automatic character set conversion in some cases.
The conditions when conversion is possible are: The conditions when conversion is possible are:
- arguments A and B have different charsets - arguments A and B have different charsets
- A wins according to coercibility rules - A wins according to coercibility rules
- character set of A is superset for character set of B (i.e. a column is stronger than a string constant,
an explicit COLLATE clause is stronger than a column)
- character set of A is either superset for character set of B,
or B is a string constant which can be converted into the
character set of A without data loss.
If all of the above is true, then it's possible to convert If all of the above is true, then it's possible to convert
B into the character set of A, and then compare according B into the character set of A, and then compare according
to the collation of A. to the collation of A.
*/ */
if (args[0] && args[1]) uint32 dummy_offset;
{ DTCollation coll;
uint strong= 0;
uint weak= 0; if (args[0]->result_type() == STRING_RESULT &&
uint32 dummy_offset; args[1]->result_type() == STRING_RESULT &&
DTCollation coll; String::needs_conversion(0, args[0]->collation.collation,
args[1]->collation.collation,
if (args[0]->result_type() == STRING_RESULT && &dummy_offset) &&
args[1]->result_type() == STRING_RESULT && !coll.set(args[0]->collation, args[1]->collation,
String::needs_conversion(0, args[0]->collation.collation, MY_COLL_ALLOW_SUPERSET_CONV |
args[1]->collation.collation, MY_COLL_ALLOW_COERCIBLE_CONV))
&dummy_offset) && {
!coll.set(args[0]->collation, args[1]->collation, TRUE)) Item* conv= 0;
Item_arena *arena= thd->current_arena, backup;
uint strong= coll.strong;
uint weak= strong ? 0 : 1;
/*
In case we're in statement prepare, create conversion item
in its memory: it will be reused on each execute.
*/
if (arena->is_stmt_prepare())
thd->set_n_backup_item_arena(arena, &backup);
if (args[weak]->type() == STRING_ITEM)
{ {
Item* conv= 0; uint conv_errors;
Item_arena *arena= thd->current_arena, backup; String tmp, cstr, *ostr= args[weak]->val_str(&tmp);
strong= coll.strong; cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(),
weak= strong ? 0 : 1; args[strong]->collation.collation, &conv_errors);
/* if (conv_errors)
In case we're in statement prepare, create conversion item
in its memory: it will be reused on each execute.
*/
if (arena->is_stmt_prepare())
thd->set_n_backup_item_arena(arena, &backup);
if (args[weak]->type() == STRING_ITEM)
{ {
String tmp, cstr; /*
String *ostr= args[weak]->val_str(&tmp); We could not convert a string into the character set
cstr.copy(ostr->ptr(), ostr->length(), ostr->charset(), of the stronger side of the operation without data loss.
args[strong]->collation.collation); It can happen if we tried to combine a column with a string
conv= new Item_string(cstr.ptr(),cstr.length(),cstr.charset(), constant, and the column charset does not cover all the
args[weak]->collation.derivation); characters from the string. Operation cannot be done
((Item_string*)conv)->str_value.copy(); correctly. Return an error.
*/
my_coll_agg_error(args[0]->collation, args[1]->collation,
func_name());
return;
} }
else conv= new Item_string(cstr.ptr(),cstr.length(),cstr.charset(),
args[weak]->collation.derivation);
((Item_string*)conv)->str_value.copy();
}
else
{
if (!(coll.collation->state & MY_CS_UNICODE))
{ {
conv= new Item_func_conv_charset(args[weak], /*
args[strong]->collation.collation); Don't allow automatic conversion to non-Unicode charsets,
conv->collation.set(args[weak]->collation.derivation); as it potentially loses data.
conv->fix_fields(thd, 0, &conv); */
my_coll_agg_error(args[0]->collation, args[1]->collation,
func_name());
return;
} }
if (arena->is_stmt_prepare()) conv= new Item_func_conv_charset(args[weak],
thd->restore_backup_item_arena(arena, &backup); args[strong]->collation.collation);
args[weak]= conv ? conv : args[weak]; conv->collation.set(args[weak]->collation.derivation);
conv->fix_fields(thd, 0, &conv);
} }
if (arena->is_stmt_prepare())
thd->restore_backup_item_arena(arena, &backup);
args[weak]= conv ? conv : args[weak];
} }
// Make a special case of compare with fields to get nicer DATE comparisons // Make a special case of compare with fields to get nicer DATE comparisons
...@@ -1782,14 +1807,13 @@ void Item_func_in::fix_length_and_dec() ...@@ -1782,14 +1807,13 @@ void Item_func_in::fix_length_and_dec()
via creating Item_func_conv_charset(). via creating Item_func_conv_charset().
*/ */
if (agg_arg_collations_for_comparison(cmp_collation, if (agg_arg_collations_for_comparison(cmp_collation, args, arg_count,
args, arg_count, TRUE)) MY_COLL_ALLOW_SUPERSET_CONV))
return; return;
if ((!my_charset_same(args[0]->collation.collation, if ((!my_charset_same(args[0]->collation.collation,
cmp_collation.collation) || !const_itm)) cmp_collation.collation) || !const_itm))
{ {
if (agg_arg_collations_for_comparison(cmp_collation, if (agg_arg_collations_for_comparison(cmp_collation, args, arg_count))
args, arg_count, FALSE))
return; return;
} }
else else
......
...@@ -76,7 +76,7 @@ static void my_coll_agg_error(Item** args, uint count, const char *fname) ...@@ -76,7 +76,7 @@ static void my_coll_agg_error(Item** args, uint count, const char *fname)
bool Item_func::agg_arg_collations(DTCollation &c, Item **av, uint count, bool Item_func::agg_arg_collations(DTCollation &c, Item **av, uint count,
bool allow_superset_conversion) uint flags)
{ {
uint i; uint i;
c.nagg= 0; c.nagg= 0;
...@@ -84,7 +84,7 @@ bool Item_func::agg_arg_collations(DTCollation &c, Item **av, uint count, ...@@ -84,7 +84,7 @@ bool Item_func::agg_arg_collations(DTCollation &c, Item **av, uint count,
c.set(av[0]->collation); c.set(av[0]->collation);
for (i= 1; i < count; i++) for (i= 1; i < count; i++)
{ {
if (c.aggregate(av[i]->collation, allow_superset_conversion)) if (c.aggregate(av[i]->collation, flags))
{ {
my_coll_agg_error(av, count, func_name()); my_coll_agg_error(av, count, func_name());
return TRUE; return TRUE;
...@@ -96,9 +96,9 @@ bool Item_func::agg_arg_collations(DTCollation &c, Item **av, uint count, ...@@ -96,9 +96,9 @@ bool Item_func::agg_arg_collations(DTCollation &c, Item **av, uint count,
bool Item_func::agg_arg_collations_for_comparison(DTCollation &c, bool Item_func::agg_arg_collations_for_comparison(DTCollation &c,
Item **av, uint count, Item **av, uint count,
bool allow_superset_conv) uint flags)
{ {
if (agg_arg_collations(c, av, count, allow_superset_conv)) if (agg_arg_collations(c, av, count, flags))
return TRUE; return TRUE;
if (c.derivation == DERIVATION_NONE) if (c.derivation == DERIVATION_NONE)
......
...@@ -141,10 +141,10 @@ public: ...@@ -141,10 +141,10 @@ public:
Item *get_tmp_table_item(THD *thd); Item *get_tmp_table_item(THD *thd);
bool agg_arg_collations(DTCollation &c, Item **items, uint nitems, bool agg_arg_collations(DTCollation &c, Item **items, uint nitems,
bool allow_superset_conversion= FALSE); uint flags= 0);
bool agg_arg_collations_for_comparison(DTCollation &c, bool agg_arg_collations_for_comparison(DTCollation &c,
Item **items, uint nitems, Item **items, uint nitems,
bool allow_superset_comversion= FALSE); uint flags= 0);
bool walk(Item_processor processor, byte *arg); bool walk(Item_processor processor, byte *arg);
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment