Commit 72d7b12b authored by Alexander Barkov's avatar Alexander Barkov

Reducing duplicate code and simplifying well formed string copying

by adding a new class String_copier.

This is a pre-requisite patch for MDEV-6566 and MDEV-6572,
to avoid adding more similar code.
parent 2d01907c
......@@ -6262,10 +6262,9 @@ bool Field_datetimef::get_date(MYSQL_TIME *ltime, ulonglong fuzzydate)
SYNOPSIS
check_string_copy_error()
field - Field
well_formed_error_pos - where not well formed data was first met
cannot_convert_error_pos - where a not-convertable character was first met
end - end of the string
copier - the conversion status
end - the very end of the source string
that was just copied
cs - character set of the string
NOTES
......@@ -6282,30 +6281,25 @@ bool Field_datetimef::get_date(MYSQL_TIME *ltime, ulonglong fuzzydate)
TRUE - If an error happened
*/
static bool
check_string_copy_error(Field_str *field,
const char *well_formed_error_pos,
const char *cannot_convert_error_pos,
const char *end,
CHARSET_INFO *cs)
bool
Field_longstr::check_string_copy_error(const String_copier *copier,
const char *end,
CHARSET_INFO *cs)
{
const char *pos;
char tmp[32];
THD *thd;
thd= field->get_thd();
if (!(pos= well_formed_error_pos) &&
!(pos= cannot_convert_error_pos))
if (!(pos= copier->most_important_error_pos()))
return FALSE;
convert_to_printable(tmp, sizeof(tmp), pos, (end - pos), cs, 6);
THD *thd= get_thd();
push_warning_printf(thd,
Sql_condition::WARN_LEVEL_WARN,
ER_TRUNCATED_WRONG_VALUE_FOR_FIELD,
ER(ER_TRUNCATED_WRONG_VALUE_FOR_FIELD),
"string", tmp, field->field_name,
"string", tmp, field_name,
thd->get_stmt_da()->current_row_for_warning());
return TRUE;
}
......@@ -6362,20 +6356,15 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs)
{
ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
uint copy_length;
const char *well_formed_error_pos;
const char *cannot_convert_error_pos;
const char *from_end_pos;
String_copier copier;
/* See the comment for Field_long::store(long long) */
DBUG_ASSERT(!table || table->in_use == current_thd);
copy_length= well_formed_copy_nchars(field_charset,
copy_length= copier.well_formed_copy(field_charset,
(char*) ptr, field_length,
cs, from, length,
field_length / field_charset->mbmaxlen,
&well_formed_error_pos,
&cannot_convert_error_pos,
&from_end_pos);
field_length / field_charset->mbmaxlen);
/* Append spaces if the string was shorter than the field. */
if (copy_length < field_length)
......@@ -6383,11 +6372,7 @@ int Field_string::store(const char *from,uint length,CHARSET_INFO *cs)
field_length-copy_length,
field_charset->pad_char);
if (check_string_copy_error(this, well_formed_error_pos,
cannot_convert_error_pos, from + length, cs))
return 2;
return report_if_important_data(from_end_pos, from + length, FALSE);
return check_conversion_status(&copier, from + length, cs, false);
}
......@@ -6874,29 +6859,19 @@ int Field_varstring::store(const char *from,uint length,CHARSET_INFO *cs)
{
ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
uint copy_length;
const char *well_formed_error_pos;
const char *cannot_convert_error_pos;
const char *from_end_pos;
String_copier copier;
copy_length= well_formed_copy_nchars(field_charset,
copy_length= copier.well_formed_copy(field_charset,
(char*) ptr + length_bytes,
field_length,
cs, from, length,
field_length / field_charset->mbmaxlen,
&well_formed_error_pos,
&cannot_convert_error_pos,
&from_end_pos);
field_length / field_charset->mbmaxlen);
if (length_bytes == 1)
*ptr= (uchar) copy_length;
else
int2store(ptr, copy_length);
if (check_string_copy_error(this, well_formed_error_pos,
cannot_convert_error_pos, from + length, cs))
return 2;
return report_if_important_data(from_end_pos, from + length, TRUE);
return check_conversion_status(&copier, from + length, cs, true);
}
......@@ -7351,9 +7326,8 @@ int Field_blob::store(const char *from,uint length,CHARSET_INFO *cs)
{
ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED;
uint copy_length, new_length;
const char *well_formed_error_pos;
const char *cannot_convert_error_pos;
const char *from_end_pos, *tmp;
String_copier copier;
const char *tmp;
char buff[STRING_BUFFER_USUAL_SIZE];
String tmpstr(buff,sizeof(buff), &my_charset_bin);
......@@ -7401,28 +7375,14 @@ int Field_blob::store(const char *from,uint length,CHARSET_INFO *cs)
bmove(ptr + packlength, (uchar*) &tmp, sizeof(char*));
return 0;
}
/*
"length" is OK as "nchars" argument to well_formed_copy_nchars as this
is never used to limit the length of the data. The cut of long data
is done with the new_length value.
*/
copy_length= well_formed_copy_nchars(field_charset,
copy_length= copier.well_formed_copy(field_charset,
(char*) value.ptr(), new_length,
cs, from, length,
length,
&well_formed_error_pos,
&cannot_convert_error_pos,
&from_end_pos);
cs, from, length);
Field_blob::store_length(copy_length);
tmp= value.ptr();
bmove(ptr+packlength,(uchar*) &tmp,sizeof(char*));
if (check_string_copy_error(this, well_formed_error_pos,
cannot_convert_error_pos, from + length, cs))
return 2;
return report_if_important_data(from_end_pos, from + length, TRUE);
return check_conversion_status(&copier, from + length, cs, true);
oom_error:
/* Fatal OOM error */
......
......@@ -1132,6 +1132,17 @@ class Field_longstr :public Field_str
protected:
int report_if_important_data(const char *ptr, const char *end,
bool count_spaces);
bool check_string_copy_error(const String_copier *copier,
const char *end, CHARSET_INFO *cs);
int check_conversion_status(const String_copier *copier,
const char *end, CHARSET_INFO *cs,
bool count_spaces)
{
if (check_string_copy_error(copier, end, cs))
return 2;
return report_if_important_data(copier->source_end_pos(),
end, count_spaces);
}
public:
Field_longstr(uchar *ptr_arg, uint32 len_arg, uchar *null_ptr_arg,
uchar null_bit_arg, utype unireg_check_arg,
......
......@@ -2986,9 +2986,7 @@ int select_export::send_data(List<Item> &items)
if (res && !my_charset_same(write_cs, res->charset()) &&
!my_charset_same(write_cs, &my_charset_bin))
{
const char *well_formed_error_pos;
const char *cannot_convert_error_pos;
const char *from_end_pos;
String_copier copier;
const char *error_pos;
uint32 bytes;
uint64 estimated_bytes=
......@@ -3001,16 +2999,11 @@ int select_export::send_data(List<Item> &items)
goto err;
}
bytes= well_formed_copy_nchars(write_cs, (char *) cvt_str.ptr(),
bytes= copier.well_formed_copy(write_cs, (char *) cvt_str.ptr(),
cvt_str.alloced_length(),
res->charset(), res->ptr(), res->length(),
UINT_MAX32, // copy all input chars,
// i.e. ignore nchars parameter
&well_formed_error_pos,
&cannot_convert_error_pos,
&from_end_pos);
error_pos= well_formed_error_pos ? well_formed_error_pos
: cannot_convert_error_pos;
res->charset(),
res->ptr(), res->length());
error_pos= copier.most_important_error_pos();
if (error_pos)
{
char printable_buff[32];
......@@ -3023,7 +3016,7 @@ int select_export::send_data(List<Item> &items)
"string", printable_buff,
item->name, static_cast<long>(row_count));
}
else if (from_end_pos < res->ptr() + res->length())
else if (copier.source_end_pos() < res->ptr() + res->length())
{
/*
result is longer than UINT_MAX32 and doesn't fit into String
......
......@@ -195,16 +195,9 @@ static bool assign_fixed_string(MEM_ROOT *mem_root,
dst_str= (char*) alloc_root(mem_root, dst_len + 1);
if (dst_str)
{
const char* well_formed_error_pos;
const char* cannot_convert_error_pos;
const char* from_end_pos;
dst_copied= well_formed_copy_nchars(dst_cs, dst_str, dst_len,
src_cs, src_str, src_len,
numchars,
& well_formed_error_pos,
& cannot_convert_error_pos,
& from_end_pos);
dst_copied= String_copier().well_formed_copy(dst_cs, dst_str, dst_len,
src_cs, src_str, src_len,
numchars);
DBUG_ASSERT(dst_copied <= dst_len);
dst_len= dst_copied; /* In case the copy truncated the data */
dst_str[dst_copied]= '\0';
......
......@@ -875,41 +875,44 @@ my_copy_with_hex_escaping(CHARSET_INFO *cs,
/*
copy a string,
Copy a string,
with optional character set conversion,
with optional left padding (for binary -> UCS2 conversion)
SYNOPSIS
well_formed_copy_nchars()
to Store result here
to_length Maxinum length of "to" string
to_cs Character set of "to" string
from Copy from here
from_length Length of from string
from_cs From character set
nchars Copy not more that nchars characters
well_formed_error_pos Return position when "from" is not well formed
In case if there is a Unicode conversion (i.e. to_cs and from_cs are
different character sets and both are not &my_charset_bin), bad input bytes
as well as characters that cannot be encoded in to_cs are replaced to '?'.
In case of non-Unicode copying (i.e. to_cs and from_cs are same character set,
or from_cs is &my_charset_bin), the function stops on the first bad
byte sequence.
The string that is written to "to" is always well-formed.
@param to The destination string
@param to_length Space available in "to"
@param to_cs Character set of the "to" string
@param from The source string
@param from_length Length of the "from" string
@param from_cs Character set of the "from" string
@param nchars Copy not more than "nchars" characters
The members as set as follows:
m_well_formed_error_pos To the position when "from" is not well formed
or NULL otherwise.
cannot_convert_error_pos Return position where a not convertable
m_cannot_convert_error_pos To the position where a not convertable
character met, or NULL otherwise.
from_end_pos Return position where scanning of "from"
m_source_end_pos To the position where scanning of the "from"
string stopped.
NOTES
RETURN
length of bytes copied to 'to'
@returns number of bytes that were written to 'to'
*/
uint32
well_formed_copy_nchars(CHARSET_INFO *to_cs,
char *to, uint to_length,
CHARSET_INFO *from_cs,
const char *from, uint from_length,
uint nchars,
const char **well_formed_error_pos,
const char **cannot_convert_error_pos,
const char **from_end_pos)
uint
String_copier::well_formed_copy(CHARSET_INFO *to_cs,
char *to, uint to_length,
CHARSET_INFO *from_cs,
const char *from, uint from_length,
uint nchars)
{
uint res;
......@@ -920,9 +923,9 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
{
if (to_length < to_cs->mbminlen || !nchars)
{
*from_end_pos= from;
*cannot_convert_error_pos= NULL;
*well_formed_error_pos= NULL;
m_source_end_pos= from;
m_cannot_convert_error_pos= NULL;
m_well_formed_error_pos= NULL;
return 0;
}
......@@ -930,9 +933,9 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
{
res= MY_MIN(MY_MIN(nchars, to_length), from_length);
memmove(to, from, res);
*from_end_pos= from + res;
*well_formed_error_pos= NULL;
*cannot_convert_error_pos= NULL;
m_source_end_pos= from + res;
m_well_formed_error_pos= NULL;
m_cannot_convert_error_pos= NULL;
}
else
{
......@@ -964,8 +967,8 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
&well_formed_error) !=
to_cs->mbminlen)
{
*from_end_pos= *well_formed_error_pos= from;
*cannot_convert_error_pos= NULL;
m_source_end_pos= m_well_formed_error_pos= from;
m_cannot_convert_error_pos= NULL;
return 0;
}
nchars--;
......@@ -979,9 +982,9 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
nchars, &well_formed_error);
memmove(to, from, res);
*from_end_pos= from + res;
*well_formed_error_pos= well_formed_error ? from + res : NULL;
*cannot_convert_error_pos= NULL;
m_source_end_pos= from + res;
m_well_formed_error_pos= well_formed_error ? from + res : NULL;
m_cannot_convert_error_pos= NULL;
if (from_offset)
res+= to_cs->mbminlen;
}
......@@ -995,8 +998,8 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
const uchar *from_end= (const uchar*) from + from_length;
uchar *to_end= (uchar*) to + to_length;
char *to_start= to;
*well_formed_error_pos= NULL;
*cannot_convert_error_pos= NULL;
m_well_formed_error_pos= NULL;
m_cannot_convert_error_pos= NULL;
for ( ; nchars; nchars--)
{
......@@ -1005,8 +1008,8 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
from+= cnvres;
else if (cnvres == MY_CS_ILSEQ)
{
if (!*well_formed_error_pos)
*well_formed_error_pos= from;
if (!m_well_formed_error_pos)
m_well_formed_error_pos= from;
from++;
wc= '?';
}
......@@ -1016,8 +1019,8 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
A correct multibyte sequence detected
But it doesn't have Unicode mapping.
*/
if (!*cannot_convert_error_pos)
*cannot_convert_error_pos= from;
if (!m_cannot_convert_error_pos)
m_cannot_convert_error_pos= from;
from+= (-cnvres);
wc= '?';
}
......@@ -1026,8 +1029,8 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
if ((uchar *) from >= from_end)
break; // End of line
// Incomplete byte sequence
if (!*well_formed_error_pos)
*well_formed_error_pos= from;
if (!m_well_formed_error_pos)
m_well_formed_error_pos= from;
from++;
wc= '?';
}
......@@ -1036,8 +1039,8 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
to+= cnvres;
else if (cnvres == MY_CS_ILUNI && wc != '?')
{
if (!*cannot_convert_error_pos)
*cannot_convert_error_pos= from_prev;
if (!m_cannot_convert_error_pos)
m_cannot_convert_error_pos= from_prev;
wc= '?';
goto outp;
}
......@@ -1047,10 +1050,10 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
break;
}
}
*from_end_pos= from;
m_source_end_pos= from;
res= (uint) (to - to_start);
}
return (uint32) res;
return res;
}
......
......@@ -41,14 +41,70 @@ inline uint32 copy_and_convert(char *to, uint32 to_length,
{
return my_convert(to, to_length, to_cs, from, from_length, from_cs, errors);
}
uint32 well_formed_copy_nchars(CHARSET_INFO *to_cs,
char *to, uint to_length,
CHARSET_INFO *from_cs,
const char *from, uint from_length,
uint nchars,
const char **well_formed_error_pos,
const char **cannot_convert_error_pos,
const char **from_end_pos);
class String_copier
{
const char *m_source_end_pos;
const char *m_well_formed_error_pos;
const char *m_cannot_convert_error_pos;
public:
const char *source_end_pos() const
{ return m_source_end_pos; }
const char *well_formed_error_pos() const
{ return m_well_formed_error_pos; }
const char *cannot_convert_error_pos() const
{ return m_cannot_convert_error_pos; }
const char *most_important_error_pos() const
{
return well_formed_error_pos() ? well_formed_error_pos() :
cannot_convert_error_pos();
}
/*
Copy a string. Fix bad bytes/characters one Unicode conversion,
break on bad bytes in case of non-Unicode copying.
*/
uint well_formed_copy(CHARSET_INFO *to_cs, char *to, uint to_length,
CHARSET_INFO *from_cs, const char *from,
uint from_length, uint nchars);
// Same as above, but without the "nchars" limit.
uint well_formed_copy(CHARSET_INFO *to_cs, char *to, uint to_length,
CHARSET_INFO *from_cs, const char *from,
uint from_length)
{
return well_formed_copy(to_cs, to, to_length,
from_cs, from, from_length,
from_length /* No limit on "nchars"*/);
}
/*
Copy a string. If a bad byte sequence is found in case of non-Unicode
copying, continues processing and replaces bad bytes to '?'.
*/
uint copy_fix(CHARSET_INFO *to_cs, char *to, uint to_length,
CHARSET_INFO *from_cs, const char *from, uint from_length)
{
uint length= well_formed_copy(to_cs, to, to_length,
from_cs, from, from_length,
from_length /* No limit on nchars */);
if (well_formed_error_pos() && source_end_pos() < from + from_length)
{
/*
There was an error and there are still some bytes in the source string.
This is possible if there were no character set conversion and a
malformed byte sequence was found. Copy the rest and replace bad
bytes to '?'. Note: m_source_end_pos is not updated!!!
*/
uint dummy_errors;
length+= copy_and_convert(to + length, to_length - length, to_cs,
source_end_pos(),
from_length - (source_end_pos() - from),
from_cs, &dummy_errors);
}
return length;
}
};
size_t my_copy_with_hex_escaping(CHARSET_INFO *cs,
char *dst, size_t dstlen,
const char *src, size_t srclen);
......
......@@ -61,8 +61,7 @@ bool parse_length_encoded_string(const char **ptr,
uint nchars_max)
{
ulong copy_length, data_length;
const char *well_formed_error_pos= NULL, *cannot_convert_error_pos= NULL,
*from_end_pos= NULL;
String_copier copier;
copy_length= data_length= net_field_length((uchar **) ptr);
......@@ -73,11 +72,8 @@ bool parse_length_encoded_string(const char **ptr,
if (*ptr - start_ptr + data_length > input_length)
return true;
copy_length= well_formed_copy_nchars(&my_charset_utf8_bin, dest, dest_size,
from_cs, *ptr, data_length, nchars_max,
&well_formed_error_pos,
&cannot_convert_error_pos,
&from_end_pos);
copy_length= copier.well_formed_copy(&my_charset_utf8_bin, dest, dest_size,
from_cs, *ptr, data_length, nchars_max);
*copied_len= copy_length;
(*ptr)+= data_length;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment