Commit e2da748c authored by Alexander Barkov's avatar Alexander Barkov

MDEV-28835 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on INSERT

Problem:

Item_func_date_format::val_str() and make_date_time() did not take into
account that the format string and the result string
(separately or at the same time) can be of a tricky character set
like UCS2, UTF16, UTF32. As a result, DATE_FORMAT() could generate
an ill-formed result which crashed on DBUG_ASSERTs testing well-formedness
in other parts of the code.

Fix:

1. class String changes
   Removing String::append_with_prefill(). It was not compatible with
   tricky character sets. Also it was inconvenient to use and required
   too much duplicate code on the caller side.
   Adding String::append_zerofill() instead. It's compatible with tricky
   character sets and is easier to use.
   Adding helper methods Static_binary_string::q_append_wc() and
   String::append_wc(), to append a single wide character
   (a Unicode code point in my_wc_t).

2. storage/spider changes
   Removing spider_string::append_with_prefill().
   It used String::append_with_prefix() inside, but it was unused itself.

3. Changing tricky charset incompatible code pieces in make_date_time()
   to compatible replacements:

   - Fixing the loop scanning the format string to iterate in terms
     of Unicode code points (using mb_wc()) rather than in terms
     of "char" items.
   - Using append_wc(my_wc_t) instead of append(char) to append
     a single character to the result string.
   - Using append_zerofill() instead of append_with_prefill() to
     append date/time numeric components to the result string.
parent 3626379d
......@@ -3394,5 +3394,49 @@ INSERT INTO t VALUES (0,0);
DELETE FROM t WHERE c2<c1;
DROP TABLE t;
#
# MDEV-28835 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on INSERT
#
SET NAMES latin1,character_set_connection=binary;
# Binary format, binary result
SELECT DATE_FORMAT('2004-02-02','%W');
DATE_FORMAT('2004-02-02','%W')
Monday
SELECT HEX(DATE_FORMAT('2004-02-02','%W'));
HEX(DATE_FORMAT('2004-02-02','%W'))
4D6F6E646179
SELECT DATE_FORMAT(TIME'-01:01:01','%h');
DATE_FORMAT(TIME'-01:01:01','%h')
-01
SELECT HEX(DATE_FORMAT(TIME'-01:01:01','%h'));
HEX(DATE_FORMAT(TIME'-01:01:01','%h'))
2D3031
# latin1 format, binary result
SELECT DATE_FORMAT('2004-02-02',_latin1'%W');
DATE_FORMAT('2004-02-02',_latin1'%W')
Monday
SELECT HEX(DATE_FORMAT('2004-02-02',_latin1'%W'));
HEX(DATE_FORMAT('2004-02-02',_latin1'%W'))
4D6F6E646179
SELECT DATE_FORMAT(TIME'-01:01:01',_latin1'%h');
DATE_FORMAT(TIME'-01:01:01',_latin1'%h')
-01
SELECT HEX(DATE_FORMAT(TIME'-01:01:01',_latin1'%h'));
HEX(DATE_FORMAT(TIME'-01:01:01',_latin1'%h'))
2D3031
# Binary format, latin1 result
SET NAMES latin1;
SELECT DATE_FORMAT('2004-02-02',_binary'%W');
DATE_FORMAT('2004-02-02',_binary'%W')
Monday
SELECT HEX(DATE_FORMAT('2004-02-02',_binary'%W'));
HEX(DATE_FORMAT('2004-02-02',_binary'%W'))
4D6F6E646179
SELECT DATE_FORMAT(TIME'-01:01:01',_binary'%h');
DATE_FORMAT(TIME'-01:01:01',_binary'%h')
-01
SELECT HEX(DATE_FORMAT(TIME'-01:01:01',_binary'%h'));
HEX(DATE_FORMAT(TIME'-01:01:01',_binary'%h'))
2D3031
#
# End of 10.4 tests
#
......@@ -241,6 +241,31 @@ INSERT INTO t VALUES (0,0);
DELETE FROM t WHERE c2<c1;
DROP TABLE t;
--echo #
--echo # MDEV-28835 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on INSERT
--echo #
SET NAMES latin1,character_set_connection=binary;
--echo # Binary format, binary result
SELECT DATE_FORMAT('2004-02-02','%W');
SELECT HEX(DATE_FORMAT('2004-02-02','%W'));
SELECT DATE_FORMAT(TIME'-01:01:01','%h');
SELECT HEX(DATE_FORMAT(TIME'-01:01:01','%h'));
--echo # latin1 format, binary result
SELECT DATE_FORMAT('2004-02-02',_latin1'%W');
SELECT HEX(DATE_FORMAT('2004-02-02',_latin1'%W'));
SELECT DATE_FORMAT(TIME'-01:01:01',_latin1'%h');
SELECT HEX(DATE_FORMAT(TIME'-01:01:01',_latin1'%h'));
--echo # Binary format, latin1 result
SET NAMES latin1;
SELECT DATE_FORMAT('2004-02-02',_binary'%W');
SELECT HEX(DATE_FORMAT('2004-02-02',_binary'%W'));
SELECT DATE_FORMAT(TIME'-01:01:01',_binary'%h');
SELECT HEX(DATE_FORMAT(TIME'-01:01:01',_binary'%h'));
--echo #
--echo # End of 10.4 tests
--echo #
......@@ -2958,5 +2958,69 @@ HEX(OCT(a))
DROP TABLE t;
SET NAMES utf8;
#
# MDEV-28835 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on INSERT
#
SET sql_mode='',character_set_connection=utf32;
CREATE TABLE t (c ENUM ('','')) CHARACTER SET utf32;
Warnings:
Note 1291 Column 'c' has duplicated value '' in ENUM
INSERT INTO t VALUES (DATE_FORMAT('2004-02-02','%W'));
Warnings:
Warning 1265 Data truncated for column 'c' at row 1
DROP TABLE t;
SET sql_mode=DEFAULT;
# utf32 format, utf32 result
SELECT DATE_FORMAT('2004-02-02','%W');
DATE_FORMAT('2004-02-02','%W')
Monday
SELECT HEX(DATE_FORMAT('2004-02-02','%W'));
HEX(DATE_FORMAT('2004-02-02','%W'))
0000004D0000006F0000006E000000640000006100000079
SELECT DATE_FORMAT(TIME'-01:01:01','%h');
DATE_FORMAT(TIME'-01:01:01','%h')
-01
SELECT HEX(DATE_FORMAT(TIME'-01:01:01','%h'));
HEX(DATE_FORMAT(TIME'-01:01:01','%h'))
0000002D0000003000000031
# utf8 format, utf32 result
SELECT DATE_FORMAT('2004-02-02',_utf8'%W');
DATE_FORMAT('2004-02-02',_utf8'%W')
Monday
SELECT HEX(DATE_FORMAT('2004-02-02',_utf8'%W'));
HEX(DATE_FORMAT('2004-02-02',_utf8'%W'))
0000004D0000006F0000006E000000640000006100000079
SELECT DATE_FORMAT(TIME'-01:01:01',_utf8'%h');
DATE_FORMAT(TIME'-01:01:01',_utf8'%h')
-01
SELECT HEX(DATE_FORMAT(TIME'-01:01:01',_utf8'%h'));
HEX(DATE_FORMAT(TIME'-01:01:01',_utf8'%h'))
0000002D0000003000000031
# utf32 format, utf8 result
SET NAMES utf8;
SELECT DATE_FORMAT('2004-02-02',CONVERT('%W' USING utf32));
DATE_FORMAT('2004-02-02',CONVERT('%W' USING utf32))
Monday
SELECT HEX(DATE_FORMAT('2004-02-02',CONVERT('%W' USING utf32)));
HEX(DATE_FORMAT('2004-02-02',CONVERT('%W' USING utf32)))
4D6F6E646179
SELECT DATE_FORMAT(TIME'-01:01:01',CONVERT('%h' USING utf32));
DATE_FORMAT(TIME'-01:01:01',CONVERT('%h' USING utf32))
-01
SELECT HEX(DATE_FORMAT(TIME'-01:01:01',CONVERT('%h' USING utf32)));
HEX(DATE_FORMAT(TIME'-01:01:01',CONVERT('%h' USING utf32)))
2D3031
# non-BMP characters in format, utf8mb3 result
# expect non-convertable characters to be replaced to '?'
SET NAMES utf8mb3;
SET @format= CONCAT(CONVERT('%h' USING utf32),
_utf32 0x0010FFFF /*a non-BMP character*/,
CONVERT('%i' USING utf32));
SELECT DATE_FORMAT(TIME'11:22:33',@format);
DATE_FORMAT(TIME'11:22:33',@format)
11?22
SELECT HEX(DATE_FORMAT(TIME'11:22:33',@format));
HEX(DATE_FORMAT(TIME'11:22:33',@format))
31313F3232
#
# End of 10.4 tests
#
......@@ -1120,6 +1120,48 @@ SELECT HEX(OCT(a)) FROM t;
DROP TABLE t;
SET NAMES utf8;
--echo #
--echo # MDEV-28835 Assertion `(length % 4) == 0' failed in my_lengthsp_utf32 on INSERT
--echo #
# --view-protocol does not yet work well with character set introducers
--disable_view_protocol
SET sql_mode='',character_set_connection=utf32;
CREATE TABLE t (c ENUM ('','')) CHARACTER SET utf32;
INSERT INTO t VALUES (DATE_FORMAT('2004-02-02','%W'));
DROP TABLE t;
SET sql_mode=DEFAULT;
--echo # utf32 format, utf32 result
SELECT DATE_FORMAT('2004-02-02','%W');
SELECT HEX(DATE_FORMAT('2004-02-02','%W'));
SELECT DATE_FORMAT(TIME'-01:01:01','%h');
SELECT HEX(DATE_FORMAT(TIME'-01:01:01','%h'));
--echo # utf8 format, utf32 result
SELECT DATE_FORMAT('2004-02-02',_utf8'%W');
SELECT HEX(DATE_FORMAT('2004-02-02',_utf8'%W'));
SELECT DATE_FORMAT(TIME'-01:01:01',_utf8'%h');
SELECT HEX(DATE_FORMAT(TIME'-01:01:01',_utf8'%h'));
--echo # utf32 format, utf8 result
SET NAMES utf8;
SELECT DATE_FORMAT('2004-02-02',CONVERT('%W' USING utf32));
SELECT HEX(DATE_FORMAT('2004-02-02',CONVERT('%W' USING utf32)));
SELECT DATE_FORMAT(TIME'-01:01:01',CONVERT('%h' USING utf32));
SELECT HEX(DATE_FORMAT(TIME'-01:01:01',CONVERT('%h' USING utf32)));
--echo # non-BMP characters in format, utf8mb3 result
--echo # expect non-convertable characters to be replaced to '?'
SET NAMES utf8mb3;
SET @format= CONCAT(CONVERT('%h' USING utf32),
_utf32 0x0010FFFF /*a non-BMP character*/,
CONVERT('%i' USING utf32));
SELECT DATE_FORMAT(TIME'11:22:33',@format);
SELECT HEX(DATE_FORMAT(TIME'11:22:33',@format));
--enable_view_protocol
--echo #
--echo # End of 10.4 tests
......
......@@ -478,7 +478,9 @@ static bool extract_date_time(THD *thd, DATE_TIME_FORMAT *format,
Create a formatted date/time value in a string.
*/
static bool make_date_time(const LEX_CSTRING &format, MYSQL_TIME *l_time,
static bool make_date_time(const LEX_CSTRING &format,
CHARSET_INFO *format_charset,
MYSQL_TIME *l_time,
timestamp_type type, const MY_LOCALE *locale,
String *str)
{
......@@ -486,21 +488,33 @@ static bool make_date_time(const LEX_CSTRING &format, MYSQL_TIME *l_time,
uint hours_i;
uint weekday;
ulong length;
const char *ptr, *end;
const uchar *ptr, *end;
str->length(0);
if (l_time->neg)
str->append('-');
str->append_wc('-');
end= (ptr= format.str) + format.length;
for (; ptr != end ; ptr++)
end= (ptr= (const uchar *) format.str) + format.length;
for ( ; ; )
{
if (*ptr != '%' || ptr+1 == end)
str->append(*ptr);
my_wc_t wc;
int mblen= format_charset->cset->mb_wc(format_charset, &wc, ptr, end);
if (mblen < 1)
return false;
ptr+= mblen;
if (wc != '%' || ptr >= end)
str->append_wc(wc);
else
{
switch (*++ptr) {
mblen= format_charset->cset->mb_wc(format_charset, &wc, ptr, end);
if (mblen < 1)
return false;
ptr+= mblen;
switch (wc) {
case 'M':
if (type == MYSQL_TIMESTAMP_TIME || !l_time->month)
return 1;
......@@ -536,8 +550,7 @@ static bool make_date_time(const LEX_CSTRING &format, MYSQL_TIME *l_time,
case 'D':
if (type == MYSQL_TIMESTAMP_TIME)
return 1;
length= (uint) (int10_to_str(l_time->day, intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 1, '0');
str->append_zerofill(l_time->day, 1);
if (l_time->day >= 10 && l_time->day <= 19)
str->append(STRING_WITH_LEN("th"));
else
......@@ -561,73 +574,62 @@ static bool make_date_time(const LEX_CSTRING &format, MYSQL_TIME *l_time,
case 'Y':
if (type == MYSQL_TIMESTAMP_TIME)
return 1;
length= (uint) (int10_to_str(l_time->year, intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 4, '0');
str->append_zerofill(l_time->year, 4);
break;
case 'y':
if (type == MYSQL_TIMESTAMP_TIME)
return 1;
length= (uint) (int10_to_str(l_time->year%100, intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 2, '0');
str->append_zerofill(l_time->year % 100, 2);
break;
case 'm':
if (type == MYSQL_TIMESTAMP_TIME)
return 1;
length= (uint) (int10_to_str(l_time->month, intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 2, '0');
str->append_zerofill(l_time->month, 2);
break;
case 'c':
if (type == MYSQL_TIMESTAMP_TIME)
return 1;
length= (uint) (int10_to_str(l_time->month, intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 1, '0');
str->append_zerofill(l_time->month, 1);
break;
case 'd':
if (type == MYSQL_TIMESTAMP_TIME)
return 1;
length= (uint) (int10_to_str(l_time->day, intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 2, '0');
str->append_zerofill(l_time->day, 2);
break;
case 'e':
if (type == MYSQL_TIMESTAMP_TIME)
return 1;
length= (uint) (int10_to_str(l_time->day, intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 1, '0');
str->append_zerofill(l_time->day, 1);
break;
case 'f':
length= (uint) (int10_to_str(l_time->second_part, intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 6, '0');
str->append_zerofill((uint) l_time->second_part, 6);
break;
case 'H':
length= (uint) (int10_to_str(l_time->hour, intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 2, '0');
str->append_zerofill(l_time->hour, 2);
break;
case 'h':
case 'I':
hours_i= (l_time->hour%24 + 11)%12+1;
length= (uint) (int10_to_str(hours_i, intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 2, '0');
str->append_zerofill(hours_i, 2);
break;
case 'i': /* minutes */
length= (uint) (int10_to_str(l_time->minute, intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 2, '0');
str->append_zerofill(l_time->minute, 2);
break;
case 'j':
{
if (type == MYSQL_TIMESTAMP_TIME || !l_time->month || !l_time->year)
return 1;
length= (uint) (int10_to_str(calc_daynr(l_time->year,l_time->month,
l_time->day) -
calc_daynr(l_time->year,1,1) + 1, intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 3, '0');
long value= calc_daynr(l_time->year,l_time->month, l_time->day) -
calc_daynr(l_time->year,1,1) + 1;
str->append_zerofill((uint) value, 3);
break;
}
case 'k':
length= (uint) (int10_to_str(l_time->hour, intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 1, '0');
str->append_zerofill(l_time->hour, 1);
break;
case 'l':
hours_i= (l_time->hour%24 + 11)%12+1;
length= (uint) (int10_to_str(hours_i, intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 1, '0');
str->append_zerofill(hours_i, 1);
break;
case 'p':
hours_i= l_time->hour%24;
......@@ -643,8 +645,7 @@ static bool make_date_time(const LEX_CSTRING &format, MYSQL_TIME *l_time,
break;
case 'S':
case 's':
length= (uint) (int10_to_str(l_time->second, intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 2, '0');
str->append_zerofill(l_time->second, 2);
break;
case 'T':
length= sprintf(intbuff, "%02d:%02d:%02d",
......@@ -657,42 +658,39 @@ static bool make_date_time(const LEX_CSTRING &format, MYSQL_TIME *l_time,
uint year;
if (type == MYSQL_TIMESTAMP_TIME)
return 1;
length= (uint) (int10_to_str(calc_week(l_time,
(*ptr) == 'U' ?
WEEK_FIRST_WEEKDAY : WEEK_MONDAY_FIRST,
&year),
intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 2, '0');
uint value= calc_week(l_time,
wc == 'U' ? WEEK_FIRST_WEEKDAY :
WEEK_MONDAY_FIRST,
&year);
str->append_zerofill(value, 2);
}
break;
case 'v':
case 'V':
{
uint year;
if (type == MYSQL_TIMESTAMP_TIME)
return 1;
length= (uint) (int10_to_str(calc_week(l_time,
((*ptr) == 'V' ?
(WEEK_YEAR | WEEK_FIRST_WEEKDAY) :
(WEEK_YEAR | WEEK_MONDAY_FIRST)),
&year),
intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 2, '0');
uint year;
if (type == MYSQL_TIMESTAMP_TIME)
return 1;
uint value= calc_week(l_time, wc == 'V' ?
(WEEK_YEAR | WEEK_FIRST_WEEKDAY) :
(WEEK_YEAR | WEEK_MONDAY_FIRST),
&year);
str->append_zerofill(value, 2);
}
break;
case 'x':
case 'X':
{
uint year;
if (type == MYSQL_TIMESTAMP_TIME)
return 1;
(void) calc_week(l_time,
((*ptr) == 'X' ?
WEEK_YEAR | WEEK_FIRST_WEEKDAY :
WEEK_YEAR | WEEK_MONDAY_FIRST),
&year);
length= (uint) (int10_to_str(year, intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 4, '0');
uint year;
if (type == MYSQL_TIMESTAMP_TIME)
return 1;
(void) calc_week(l_time,
(wc == 'X' ?
WEEK_YEAR | WEEK_FIRST_WEEKDAY :
WEEK_YEAR | WEEK_MONDAY_FIRST),
&year);
str->append_zerofill(year, 4);
}
break;
case 'w':
......@@ -700,12 +698,11 @@ static bool make_date_time(const LEX_CSTRING &format, MYSQL_TIME *l_time,
return 1;
weekday=calc_weekday(calc_daynr(l_time->year,l_time->month,
l_time->day),1);
length= (uint) (int10_to_str(weekday, intbuff, 10) - intbuff);
str->append_with_prefill(intbuff, length, 1, '0');
str->append_zerofill(weekday, 1);
break;
default:
str->append(*ptr);
str->append_wc(wc);
break;
}
}
......@@ -1919,7 +1916,7 @@ String *Item_func_date_format::val_str(String *str)
/* Create the result string */
str->set_charset(collation.collation);
if (!make_date_time(format->lex_cstring(), &l_time,
if (!make_date_time(format->lex_cstring(), format->charset(), &l_time,
is_time_format ? MYSQL_TIMESTAMP_TIME :
MYSQL_TIMESTAMP_DATE,
lc, str))
......
......@@ -649,24 +649,6 @@ bool String::append_parenthesized(long nr, int radix)
}
bool String::append_with_prefill(const char *s,uint32 arg_length,
uint32 full_length, char fill_char)
{
int t_length= arg_length > full_length ? arg_length : full_length;
if (realloc_with_extra_if_needed(str_length + t_length))
return TRUE;
t_length= full_length - arg_length;
if (t_length > 0)
{
bfill(Ptr+str_length, t_length, fill_char);
str_length=str_length + t_length;
}
append(s, arg_length);
return FALSE;
}
int Static_binary_string::strstr(const Static_binary_string &s, uint32 offset)
{
if (s.length()+offset <= str_length)
......
......@@ -392,6 +392,19 @@ class Static_binary_string : public Sql_alloc
float8store(Ptr + str_length, *d);
str_length += 8;
}
/*
Append a wide character.
The caller must have allocated at least cs->mbmaxlen bytes.
*/
int q_append_wc(my_wc_t wc, CHARSET_INFO *cs)
{
int mblen;
if ((mblen= cs->cset->wc_mb(cs, wc,
(uchar *) end(),
(uchar *) end() + cs->mbmaxlen)) > 0)
str_length+= (uint32) mblen;
return mblen;
}
void q_append(const char *data, size_t data_len)
{
if (data_len)
......@@ -1009,8 +1022,6 @@ class String: public Charset, public Binary_string
(quot && append(quot));
}
bool append(const char *s, size_t size);
bool append_with_prefill(const char *s, uint32 arg_length,
uint32 full_length, char fill_char);
bool append_parenthesized(long nr, int radix= 10);
// Append with optional character set conversion from cs to charset()
......@@ -1020,6 +1031,31 @@ class String: public Charset, public Binary_string
return append(s.str, s.length, cs);
}
// Append a wide character
bool append_wc(my_wc_t wc)
{
if (reserve(mbmaxlen()))
return true;
int mblen= q_append_wc(wc, charset());
if (mblen > 0)
return false;
else if (mblen == MY_CS_ILUNI && wc != '?')
return q_append_wc('?', charset()) <= 0;
return true;
}
// Append a number with zero prefilling
bool append_zerofill(uint num, uint width)
{
static const char zeros[15]= "00000000000000";
char intbuff[15];
uint length= (uint) (int10_to_str(num, intbuff, 10) - intbuff);
if (length < width &&
append(zeros, width - length, &my_charset_latin1))
return true;
return append(intbuff, length, &my_charset_latin1);
}
/*
Append a bitmask in an uint32 with a translation into a
C-style human readable representation, e.g.:
......
......@@ -445,12 +445,6 @@ class spider_string
IO_CACHE *file,
uint32 arg_length
);
bool append_with_prefill(
const char *s,
uint32 arg_length,
uint32 full_length,
char fill_char
);
int strstr(
const String &search,
uint32 offset = 0
......
......@@ -941,23 +941,6 @@ bool spider_string::append(
DBUG_RETURN(res);
}
bool spider_string::append_with_prefill(
const char *s,
uint32 arg_length,
uint32 full_length,
char fill_char
) {
DBUG_ENTER("spider_string::append_with_prefill");
DBUG_PRINT("info",("spider this=%p", this));
DBUG_ASSERT(mem_calc_inited);
DBUG_ASSERT((!current_alloc_mem && !str.is_alloced()) ||
current_alloc_mem == str.alloced_length());
bool res = str.append_with_prefill(s, arg_length, full_length,
fill_char);
SPIDER_STRING_CALC_MEM;
DBUG_RETURN(res);
}
int spider_string::strstr(
const String &search,
uint32 offset
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment