Bug#18908: ERROR 1406 (22001): Data too long for column :: using utf8

Problem: Too confusing error message when cannot convert
between string and column character sets on INSERT and UPDATE.
Fix: producing a better error message, instead of "Data too long"
in such cases
Additional changes: Adding "DROP TABLE IF EXISTS" into several
tests to be safe against failures in previous tests. 
parent bbf84d6b
......@@ -2,3 +2,4 @@
44ec850ac2k4y2Omgr92GiWPBAVKGQ
44edb86b1iE5knJ97MbliK_3lCiAXA
44f33f3aj5KW5qweQeekY1LU0E9ZCg
452c6c6dAjuNghfc1ObZ_UQ5SCl85g
......@@ -2,6 +2,10 @@
# Test for strict-mode autoincrement
#
--disable_warnings
drop table if exists t1;
--enable_warnings
set @org_mode=@@sql_mode;
eval create table t1
(
......
......@@ -171,8 +171,8 @@ create table t1 (a char(10) character set koi8r, b text character set koi8r);
insert into t1 values ('test','test');
insert into t1 values ('','');
Warnings:
Warning 1265 Data truncated for column 'a' at row 1
Warning 1265 Data truncated for column 'b' at row 1
Warning 1366 Incorrect string value: '\xCA\xC3\xD5\xCB' for column 'a' at row 1
Warning 1366 Incorrect string value: '\xCA\xC3\xD5\xCB' for column 'b' at row 1
drop table t1;
set names koi8r;
create table t1 (a char(10) character set cp1251);
......
......@@ -197,7 +197,7 @@ drop table t1;
create table t1 (s1 char(10) character set utf8);
insert into t1 values (0x41FF);
Warnings:
Warning 1265 Data truncated for column 's1' at row 1
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
......@@ -205,7 +205,7 @@ drop table t1;
create table t1 (s1 varchar(10) character set utf8);
insert into t1 values (0x41FF);
Warnings:
Warning 1265 Data truncated for column 's1' at row 1
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
......@@ -213,7 +213,7 @@ drop table t1;
create table t1 (s1 text character set utf8);
insert into t1 values (0x41FF);
Warnings:
Warning 1265 Data truncated for column 's1' at row 1
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
......
......@@ -372,10 +372,10 @@ t collation(t) FORMAT(MATCH t AGAINST ('Osnabruck'),6)
aus Osnabrck utf8_general_ci 1.591140
alter table t1 modify t varchar(200) collate latin1_german2_ci not null;
Warnings:
Warning 1265 Data truncated for column 't' at row 3
Warning 1265 Data truncated for column 't' at row 4
Warning 1265 Data truncated for column 't' at row 5
Warning 1265 Data truncated for column 't' at row 6
Warning 1366 Incorrect string value: '\xD0\xAD\xD1\x82\xD0\xBE...' for column 't' at row 3
Warning 1366 Incorrect string value: '\xD0\x9E\xD1\x82\xD0\xBB...' for column 't' at row 4
Warning 1366 Incorrect string value: '\xD0\x9D\xD0\xB5 \xD0...' for column 't' at row 5
Warning 1366 Incorrect string value: '\xD0\xB8 \xD0\xB1\xD1...' for column 't' at row 6
SELECT t, collation(t) FROM t1 WHERE MATCH t AGAINST ('Osnabrck');
t collation(t)
aus Osnabrck latin1_german2_ci
......
drop table if exists t1;
set @org_mode=@@sql_mode;
create table t1
(
......
drop table if exists t1;
set @org_mode=@@sql_mode;
create table t1
(
......
drop table if exists t1;
set @org_mode=@@sql_mode;
create table t1
(
......
drop table if exists t1;
set @org_mode=@@sql_mode;
create table t1
(
......
drop table if exists t1;
set @org_mode=@@sql_mode;
create table t1
(
......
......@@ -5799,37 +5799,148 @@ void Field_datetime::sql_type(String &res) const
** A string may be varchar or binary
****************************************************************************/
/*
Report "not well formed" or "cannot convert" error
after storing a character string info a field.
SYNOPSIS
check_string_copy_error()
field - Field
well_formed_error_pos - where not well formed data was first met
cannot_convert_error_pos - where a not-convertable character was first met
end - end of the string
NOTES
As of version 5.0 both cases return the same error:
"Invalid string value: 'xxx' for column 't' at row 1"
Future versions will possibly introduce a new error message:
"Cannot convert character string: 'xxx' for column 't' at row 1"
RETURN
FALSE - If errors didn't happen
TRUE - If an error happened
*/
static bool
check_string_copy_error(Field_str *field,
const char *well_formed_error_pos,
const char *cannot_convert_error_pos,
const char *end)
{
const char *pos, *end_orig;
char tmp[64], *t;
if (!(pos= well_formed_error_pos) &&
!(pos= cannot_convert_error_pos))
return FALSE;
end_orig= end;
set_if_smaller(end, pos + 6);
for (t= tmp; pos < end; pos++)
{
if (((unsigned char) *pos) >= 0x20 &&
((unsigned char) *pos) <= 0x7F)
{
*t++= *pos;
}
else
{
*t++= '\\';
*t++= 'x';
*t++= _dig_vec_upper[((unsigned char) *pos) >> 4];
*t++= _dig_vec_upper[((unsigned char) *pos) & 15];
}
}
if (end_orig > end)
{
*t++= '.';
*t++= '.';
*t++= '.';
}
*t= '\0';
push_warning_printf(field->table->in_use,
field->table->in_use->abort_on_warning ?
MYSQL_ERROR::WARN_LEVEL_ERROR :
MYSQL_ERROR::WARN_LEVEL_WARN,
ER_TRUNCATED_WRONG_VALUE_FOR_FIELD,
ER(ER_TRUNCATED_WRONG_VALUE_FOR_FIELD),
"string", tmp, field->field_name,
(ulong) field->table->in_use->row_count);
return TRUE;
}
/*
Send a truncation warning or a truncation error
after storing a too long character string info a field.
SYNOPSIS
report_data_too_long()
field - Field
RETURN
N/A
*/
inline void
report_data_too_long(Field_str *field)
{
if (field->table->in_use->abort_on_warning)
field->set_warning(MYSQL_ERROR::WARN_LEVEL_ERROR, ER_DATA_TOO_LONG, 1);
else
field->set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1);
}
/*
Test if the given string contains important data:
not spaces for character string,
or any data for binary string.
SYNOPSIS
test_if_important_data()
cs Character set
str String to test
strend String end
RETURN
FALSE - If string does not have important data
TRUE - If string has some important data
*/
static bool
test_if_important_data(CHARSET_INFO *cs, const char *str, const char *strend)
{
if (cs != &my_charset_bin)
str+= cs->cset->scan(cs, str, strend, MY_SEQ_SPACES);
return (str < strend);
}
/* Copy a string and fill with space */
int Field_string::store(const char *from,uint length,CHARSET_INFO *cs)
{
int error= 0, well_formed_error;
uint32 not_used;
char buff[STRING_BUFFER_USUAL_SIZE];
String tmpstr(buff,sizeof(buff), &my_charset_bin);
uint copy_length;
const char *well_formed_error_pos;
const char *cannot_convert_error_pos;
const char *from_end_pos;
/* See the comment for Field_long::store(long long) */
DBUG_ASSERT(table->in_use == current_thd);
/* Convert character set if necessary */
if (String::needs_conversion(length, cs, field_charset, &not_used))
{
uint conv_errors;
tmpstr.copy(from, length, cs, field_charset, &conv_errors);
from= tmpstr.ptr();
length= tmpstr.length();
if (conv_errors)
error= 2;
}
/* Make sure we don't break a multibyte sequence or copy malformed data. */
copy_length= field_charset->cset->well_formed_len(field_charset,
from,from+length,
field_length/
field_charset->mbmaxlen,
&well_formed_error);
memmove(ptr, from, copy_length);
copy_length= well_formed_copy_nchars(field_charset,
ptr, field_length,
cs, from, length,
field_length / field_charset->mbmaxlen,
&well_formed_error_pos,
&cannot_convert_error_pos,
&from_end_pos);
/* Append spaces if the string was shorter than the field. */
if (copy_length < field_length)
......@@ -5837,32 +5948,23 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs)
field_length-copy_length,
field_charset->pad_char);
if (check_string_copy_error(this, well_formed_error_pos,
cannot_convert_error_pos, from + length))
return 2;
/*
Check if we lost any important data (anything in a binary string,
or any non-space in others).
*/
if ((copy_length < length) && table->in_use->count_cuted_fields)
if ((from_end_pos < from + length) && table->in_use->count_cuted_fields)
{
if (binary())
error= 2;
else
if (test_if_important_data(field_charset, from_end_pos, from + length))
{
const char *end=from+length;
from+= copy_length;
from+= field_charset->cset->scan(field_charset, from, end,
MY_SEQ_SPACES);
if (from != end)
error= 2;
}
report_data_too_long(this);
return 2;
}
if (error)
{
if (table->in_use->abort_on_warning)
set_warning(MYSQL_ERROR::WARN_LEVEL_ERROR, ER_DATA_TOO_LONG, 1);
else
set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1);
}
return error;
return 0;
}
......@@ -6195,58 +6297,35 @@ Field *Field_string::new_field(MEM_ROOT *root, struct st_table *new_table,
int Field_varstring::store(const char *from,uint length,CHARSET_INFO *cs)
{
uint32 not_used, copy_length;
char buff[STRING_BUFFER_USUAL_SIZE];
String tmpstr(buff,sizeof(buff), &my_charset_bin);
int error_code= 0, well_formed_error;
enum MYSQL_ERROR::enum_warning_level level= MYSQL_ERROR::WARN_LEVEL_WARN;
uint copy_length;
const char *well_formed_error_pos;
const char *cannot_convert_error_pos;
const char *from_end_pos;
copy_length= well_formed_copy_nchars(field_charset,
ptr + length_bytes, field_length,
cs, from, length,
field_length / field_charset->mbmaxlen,
&well_formed_error_pos,
&cannot_convert_error_pos,
&from_end_pos);
/* Convert character set if necessary */
if (String::needs_conversion(length, cs, field_charset, &not_used))
{
uint conv_errors;
tmpstr.copy(from, length, cs, field_charset, &conv_errors);
from= tmpstr.ptr();
length= tmpstr.length();
if (conv_errors)
error_code= WARN_DATA_TRUNCATED;
}
/*
Make sure we don't break a multibyte sequence
as well as don't copy a malformed data.
*/
copy_length= field_charset->cset->well_formed_len(field_charset,
from,from+length,
field_length/
field_charset->mbmaxlen,
&well_formed_error);
memmove(ptr + length_bytes, from, copy_length);
if (length_bytes == 1)
*ptr= (uchar) copy_length;
else
int2store(ptr, copy_length);
if (check_string_copy_error(this, well_formed_error_pos,
cannot_convert_error_pos, from + length))
return 2;
// Check if we lost something other than just trailing spaces
if ((copy_length < length) && table->in_use->count_cuted_fields &&
!error_code)
{
if (!binary())
{
const char *end= from + length;
from+= copy_length;
from+= field_charset->cset->scan(field_charset, from, end, MY_SEQ_SPACES);
/* If we lost only spaces then produce a NOTE, not a WARNING */
if (from == end)
level= MYSQL_ERROR::WARN_LEVEL_NOTE;
}
error_code= WARN_DATA_TRUNCATED;
}
if (error_code)
if ((from_end_pos < from + length) && table->in_use->count_cuted_fields)
{
if (level == MYSQL_ERROR::WARN_LEVEL_WARN &&
table->in_use->abort_on_warning)
error_code= ER_DATA_TOO_LONG;
set_warning(level, error_code, 1);
if (test_if_important_data(field_charset, from_end_pos, from + length))
report_data_too_long(this);
else /* If we lost only spaces then produce a NOTE, not a WARNING */
set_warning(MYSQL_ERROR::WARN_LEVEL_NOTE, WARN_DATA_TRUNCATED, 1);
return 2;
}
return 0;
......@@ -6828,68 +6907,70 @@ void Field_blob::put_length(char *pos, uint32 length)
int Field_blob::store(const char *from,uint length,CHARSET_INFO *cs)
{
int error= 0, well_formed_error;
uint copy_length, new_length;
const char *well_formed_error_pos;
const char *cannot_convert_error_pos;
const char *from_end_pos, *tmp;
char buff[STRING_BUFFER_USUAL_SIZE];
String tmpstr(buff,sizeof(buff), &my_charset_bin);
if (!length)
{
bzero(ptr,Field_blob::pack_length());
return 0;
}
else
{
bool was_conversion;
char buff[STRING_BUFFER_USUAL_SIZE];
String tmpstr(buff,sizeof(buff), &my_charset_bin);
uint copy_length;
uint32 not_used;
/* Convert character set if necessary */
if ((was_conversion= String::needs_conversion(length, cs, field_charset,
&not_used)))
if (from == value.ptr())
{
uint conv_errors;
if (tmpstr.copy(from, length, cs, field_charset, &conv_errors))
uint32 dummy_offset;
if (!String::needs_conversion(length, cs, field_charset, &dummy_offset))
{
/* Fatal OOM error */
bzero(ptr,Field_blob::pack_length());
return -1;
Field_blob::store_length(length);
bmove(ptr+packlength,(char*) &from,sizeof(char*));
return 0;
}
if (tmpstr.copy(from, length, cs))
goto oom_error;
from= tmpstr.ptr();
length= tmpstr.length();
if (conv_errors)
error= 2;
}
copy_length= max_data_length();
new_length= min(max_data_length(), field_charset->mbmaxlen * length);
if (value.alloc(new_length))
goto oom_error;
/*
copy_length is OK as last argument to well_formed_len as this is never
used to limit the length of the data. The cut of long data is done with
the 'min()' call below.
"length" is OK as "nchars" argument to well_formed_copy_nchars as this
is never used to limit the length of the data. The cut of long data
is done with the new_length value.
*/
copy_length= field_charset->cset->well_formed_len(field_charset,
from,from +
min(length, copy_length),
copy_length,
&well_formed_error);
if (copy_length < length)
error= 2;
copy_length= well_formed_copy_nchars(field_charset,
(char*) value.ptr(), new_length,
cs, from, length,
length,
&well_formed_error_pos,
&cannot_convert_error_pos,
&from_end_pos);
Field_blob::store_length(copy_length);
if (was_conversion || table->copy_blobs || copy_length <= MAX_FIELD_WIDTH)
{ // Must make a copy
if (from != value.ptr()) // For valgrind
{
value.copy(from,copy_length,charset());
from=value.ptr();
}
}
bmove(ptr+packlength,(char*) &from,sizeof(char*));
}
if (error)
tmp= value.ptr();
bmove(ptr+packlength,(char*) &tmp,sizeof(char*));
if (check_string_copy_error(this, well_formed_error_pos,
cannot_convert_error_pos, from + length))
return 2;
if (copy_length < length)
{
if (table->in_use->abort_on_warning)
set_warning(MYSQL_ERROR::WARN_LEVEL_ERROR, ER_DATA_TOO_LONG, 1);
else
set_warning(MYSQL_ERROR::WARN_LEVEL_WARN, WARN_DATA_TRUNCATED, 1);
report_data_too_long(this);
return 2;
}
return 0;
oom_error:
/* Fatal OOM error */
bzero(ptr,Field_blob::pack_length());
return -1;
}
......
......@@ -854,6 +854,162 @@ outp:
}
/*
copy a string,
with optional character set conversion,
with optional left padding (for binary -> UCS2 conversion)
SYNOPSIS
well_formed_copy_nhars()
to Store result here
to_length Maxinum length of "to" string
to_cs Character set of "to" string
from Copy from here
from_length Length of from string
from_cs From character set
nchars Copy not more that nchars characters
well_formed_error_pos Return position when "from" is not well formed
or NULL otherwise.
cannot_convert_error_pos Return position where a not convertable
character met, or NULL otherwise.
from_end_pos Return position where scanning of "from"
string stopped.
NOTES
RETURN
length of bytes copied to 'to'
*/
uint32
well_formed_copy_nchars(CHARSET_INFO *to_cs,
char *to, uint to_length,
CHARSET_INFO *from_cs,
const char *from, uint from_length,
uint nchars,
const char **well_formed_error_pos,
const char **cannot_convert_error_pos,
const char **from_end_pos)
{
uint res;
if ((to_cs == &my_charset_bin) ||
(from_cs == &my_charset_bin) ||
(to_cs == from_cs) ||
my_charset_same(from_cs, to_cs))
{
if (to_length < to_cs->mbminlen || !nchars)
{
*from_end_pos= from;
*cannot_convert_error_pos= NULL;
*well_formed_error_pos= NULL;
return 0;
}
if (to_cs == &my_charset_bin)
{
res= min(min(nchars, to_length), from_length);
memmove(to, from, res);
*from_end_pos= from + res;
*well_formed_error_pos= NULL;
*cannot_convert_error_pos= NULL;
}
else
{
int well_formed_error;
uint from_offset;
if ((from_offset= (from_length % to_cs->mbminlen)) &&
(from_cs == &my_charset_bin))
{
/*
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
INSERT INTO t1 (ucs2_column) VALUES (0x01);
0x01 -> 0x0001
*/
uint pad_length= to_cs->mbminlen - from_offset;
bzero(to, pad_length);
memmove(to + pad_length, from, from_offset);
nchars--;
from+= from_offset;
from_length-= from_offset;
to+= to_cs->mbminlen;
to_length-= to_cs->mbminlen;
}
set_if_smaller(from_length, to_length);
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
nchars, &well_formed_error);
memmove(to, from, res);
*from_end_pos= from + res;
*well_formed_error_pos= well_formed_error ? from + res : NULL;
*cannot_convert_error_pos= NULL;
if (from_offset)
res+= to_cs->mbminlen;
}
}
else
{
int cnvres;
my_wc_t wc;
int (*mb_wc)(struct charset_info_st *, my_wc_t *,
const uchar *, const uchar *)= from_cs->cset->mb_wc;
int (*wc_mb)(struct charset_info_st *, my_wc_t,
uchar *s, uchar *e)= to_cs->cset->wc_mb;
const uchar *from_end= (const uchar*) from + from_length;
uchar *to_end= (uchar*) to + to_length;
char *to_start= to;
*well_formed_error_pos= NULL;
*cannot_convert_error_pos= NULL;
for ( ; nchars; nchars--)
{
const char *from_prev= from;
if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
from+= cnvres;
else if (cnvres == MY_CS_ILSEQ)
{
if (!*well_formed_error_pos)
*well_formed_error_pos= from;
from++;
wc= '?';
}
else if (cnvres > MY_CS_TOOSMALL)
{
/*
A correct multibyte sequence detected
But it doesn't have Unicode mapping.
*/
if (!*cannot_convert_error_pos)
*cannot_convert_error_pos= from;
from+= (-cnvres);
wc= '?';
}
else
break; // Not enough characters
outp:
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
to+= cnvres;
else if (cnvres == MY_CS_ILUNI && wc != '?')
{
if (!*cannot_convert_error_pos)
*cannot_convert_error_pos= from_prev;
wc= '?';
goto outp;
}
else
break;
}
*from_end_pos= from;
res= to - to_start;
}
return (uint32) res;
}
void String::print(String *str)
{
char *st= (char*)Ptr, *end= st+str_length;
......
......@@ -32,6 +32,14 @@ String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
uint32 copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
const char *from, uint32 from_length,
CHARSET_INFO *from_cs, uint *errors);
uint32 well_formed_copy_nchars(CHARSET_INFO *to_cs,
char *to, uint to_length,
CHARSET_INFO *from_cs,
const char *from, uint from_length,
uint nchars,
const char **well_formed_error_pos,
const char **cannot_convert_error_pos,
const char **from_end_pos);
class String
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment