Commit e463ee94 authored by unknown's avatar unknown

Bug#20404: SHOW CREATE TABLE fails with Turkish I

  
  Problem: SHOW CREATE TABLE printed garbage in table
  name for tables having TURKISH I
  (i.e. LATIN CAPITABLE LETTER I WITH DOT ABOVE)
  when lower-case-table-name=1.
  
  Reason: In some cases during lower/upper conversion in utf8,
  the result string can be shorter the original string
  (including the above letter). Old implementation of caseup_str()
  and casedn_str() didn't handle the result length properly,
  assuming that length cannot change.
  
  This fix changes the result type of cs->cset->casedn_str()
  and cs->cset->caseup_str() from VOID to UINT, to return
  the result length, as well as put '\0' terminator on a 
  proper place.
  
  Also, my_caseup_str_utf8() and my_casedn_str_utf8() were 
  rewritten not to use strlen() for performance purposes.
  It was done with help of adding of new functions - my_utf8_uni_no_range()
  and my_uni_utf8_no_range() - for null terminated strings.



include/m_ctype.h:
  Changeing return type from void to int for caseup_str() and casedn_str()
mysql-test/r/lowercase_table.result:
  Adding test case
mysql-test/t/lowercase_table.test:
  Adding test case
sql/sql_parse.cc:
  Set table->table.length to result of my_casedn_str().
strings/ctype-bin.c:
  Changeing return type from void to int for caseup_str() and casedn_str()
strings/ctype-mb.c:
  Changeing return type from void to int for caseup_str() and casedn_str()
strings/ctype-simple.c:
  Changeing return type from void to int for caseup_str() and casedn_str()
strings/ctype-ucs2.c:
  Changeing return type from void to int for caseup_str() and casedn_str()
strings/ctype-utf8.c:
  Changeing return type from void to int for caseup_str() and casedn_str().
      Optimization, to get rid of strlen():
      Adding my_utf8_uni_no_range() and my_uni_utf8_no_range() - for null
      terninated strings.
parent 4453dc48
...@@ -178,8 +178,8 @@ typedef struct my_charset_handler_st ...@@ -178,8 +178,8 @@ typedef struct my_charset_handler_st
unsigned char *s,unsigned char *e); unsigned char *s,unsigned char *e);
/* Functions for case and sort convertion */ /* Functions for case and sort convertion */
void (*caseup_str)(struct charset_info_st *, char *); uint (*caseup_str)(struct charset_info_st *, char *);
void (*casedn_str)(struct charset_info_st *, char *); uint (*casedn_str)(struct charset_info_st *, char *);
uint (*caseup)(struct charset_info_st *, char *src, uint srclen, uint (*caseup)(struct charset_info_st *, char *src, uint srclen,
char *dst, uint dstlen); char *dst, uint dstlen);
uint (*casedn)(struct charset_info_st *, char *src, uint srclen, uint (*casedn)(struct charset_info_st *, char *src, uint srclen,
...@@ -311,8 +311,8 @@ extern uint my_instr_simple(struct charset_info_st *, ...@@ -311,8 +311,8 @@ extern uint my_instr_simple(struct charset_info_st *,
/* Functions for 8bit */ /* Functions for 8bit */
extern void my_caseup_str_8bit(CHARSET_INFO *, char *); extern uint my_caseup_str_8bit(CHARSET_INFO *, char *);
extern void my_casedn_str_8bit(CHARSET_INFO *, char *); extern uint my_casedn_str_8bit(CHARSET_INFO *, char *);
extern uint my_caseup_8bit(CHARSET_INFO *, char *src, uint srclen, extern uint my_caseup_8bit(CHARSET_INFO *, char *src, uint srclen,
char *dst, uint dstlen); char *dst, uint dstlen);
extern uint my_casedn_8bit(CHARSET_INFO *, char *src, uint srclen, extern uint my_casedn_8bit(CHARSET_INFO *, char *src, uint srclen,
...@@ -399,8 +399,8 @@ int my_mbcharlen_8bit(CHARSET_INFO *, uint c); ...@@ -399,8 +399,8 @@ int my_mbcharlen_8bit(CHARSET_INFO *, uint c);
/* Functions for multibyte charsets */ /* Functions for multibyte charsets */
extern void my_caseup_str_mb(CHARSET_INFO *, char *); extern uint my_caseup_str_mb(CHARSET_INFO *, char *);
extern void my_casedn_str_mb(CHARSET_INFO *, char *); extern uint my_casedn_str_mb(CHARSET_INFO *, char *);
extern uint my_caseup_mb(CHARSET_INFO *, char *src, uint srclen, extern uint my_caseup_mb(CHARSET_INFO *, char *src, uint srclen,
char *dst, uint dstlen); char *dst, uint dstlen);
extern uint my_casedn_mb(CHARSET_INFO *, char *src, uint srclen, extern uint my_casedn_mb(CHARSET_INFO *, char *src, uint srclen,
......
...@@ -84,3 +84,27 @@ create table t2 like T1; ...@@ -84,3 +84,27 @@ create table t2 like T1;
drop table t1, t2; drop table t1, t2;
show tables; show tables;
Tables_in_test Tables_in_test
set names utf8;
drop table if exists İ,İİ;
create table İ (s1 int);
show create table İ;
Table Create Table
İ CREATE TABLE `i` (
`s1` int(11) default NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
show tables;
Tables_in_test
i
drop table İ;
create table İİ (s1 int);
show create table İİ;
Table Create Table
İİ CREATE TABLE `ii` (
`s1` int(11) default NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
show tables;
Tables_in_test
ii
drop table İİ;
set names latin1;
End of 5.0 tests
...@@ -85,3 +85,23 @@ drop table t1, t2; ...@@ -85,3 +85,23 @@ drop table t1, t2;
show tables; show tables;
# End of 4.1 tests # End of 4.1 tests
#
# Bug#20404: SHOW CREATE TABLE fails with Turkish I
#
set names utf8;
--disable_warnings
drop table if exists İ,İİ;
--enable_warnings
create table İ (s1 int);
show create table İ;
show tables;
drop table İ;
create table İİ (s1 int);
show create table İİ;
show tables;
drop table İİ;
set names latin1;
--echo End of 5.0 tests
...@@ -6177,7 +6177,7 @@ TABLE_LIST *st_select_lex::add_table_to_list(THD *thd, ...@@ -6177,7 +6177,7 @@ TABLE_LIST *st_select_lex::add_table_to_list(THD *thd,
ptr->alias= alias_str; ptr->alias= alias_str;
if (lower_case_table_names && table->table.length) if (lower_case_table_names && table->table.length)
my_casedn_str(files_charset_info, table->table.str); table->table.length= my_casedn_str(files_charset_info, table->table.str);
ptr->table_name=table->table.str; ptr->table_name=table->table.str;
ptr->table_name_length=table->table.length; ptr->table_name_length=table->table.length;
ptr->lock_type= lock_type; ptr->lock_type= lock_type;
......
...@@ -211,9 +211,10 @@ static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)), ...@@ -211,9 +211,10 @@ static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
/* This function is used for all conversion functions */ /* This function is used for all conversion functions */
static void my_case_str_bin(CHARSET_INFO *cs __attribute__((unused)), static uint my_case_str_bin(CHARSET_INFO *cs __attribute__((unused)),
char *str __attribute__((unused))) char *str __attribute__((unused)))
{ {
return 0;
} }
static uint my_case_bin(CHARSET_INFO *cs __attribute__((unused)), static uint my_case_bin(CHARSET_INFO *cs __attribute__((unused)),
......
...@@ -21,40 +21,44 @@ ...@@ -21,40 +21,44 @@
#ifdef USE_MB #ifdef USE_MB
void my_caseup_str_mb(CHARSET_INFO * cs, char *str) uint my_caseup_str_mb(CHARSET_INFO * cs, char *str)
{ {
register uint32 l; register uint32 l;
register uchar *map=cs->to_upper; register uchar *map= cs->to_upper;
char *str_orig= str;
while (*str) while (*str)
{ {
/* Pointing after the '\0' is safe here. */ /* Pointing after the '\0' is safe here. */
if ((l=my_ismbchar(cs, str, str + cs->mbmaxlen))) if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
str+=l; str+= l;
else else
{ {
*str=(char) map[(uchar)*str]; *str= (char) map[(uchar)*str];
str++; str++;
} }
} }
return str - str_orig;
} }
void my_casedn_str_mb(CHARSET_INFO * cs, char *str) uint my_casedn_str_mb(CHARSET_INFO * cs, char *str)
{ {
register uint32 l; register uint32 l;
register uchar *map=cs->to_lower; register uchar *map= cs->to_lower;
char *str_orig= str;
while (*str) while (*str)
{ {
/* Pointing after the '\0' is safe here. */ /* Pointing after the '\0' is safe here. */
if ((l=my_ismbchar(cs, str, str + cs->mbmaxlen))) if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
str+=l; str+= l;
else else
{ {
*str=(char) map[(uchar)*str]; *str= (char) map[(uchar)*str];
str++; str++;
} }
} }
return str - str_orig;
} }
uint my_caseup_mb(CHARSET_INFO * cs, char *src, uint srclen, uint my_caseup_mb(CHARSET_INFO * cs, char *src, uint srclen,
......
...@@ -188,20 +188,26 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, uint a_length, ...@@ -188,20 +188,26 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, uint a_length,
} }
void my_caseup_str_8bit(CHARSET_INFO * cs,char *str) uint my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
{ {
register uchar *map=cs->to_upper; register uchar *map= cs->to_upper;
while ((*str = (char) map[(uchar) *str]) != 0) char *str_orig= str;
while ((*str= (char) map[(uchar) *str]) != 0)
str++; str++;
return str - str_orig;
} }
void my_casedn_str_8bit(CHARSET_INFO * cs,char *str)
uint my_casedn_str_8bit(CHARSET_INFO * cs,char *str)
{ {
register uchar *map=cs->to_lower; register uchar *map= cs->to_lower;
while ((*str = (char) map[(uchar)*str]) != 0) char *str_orig= str;
while ((*str= (char) map[(uchar) *str]) != 0)
str++; str++;
return str - str_orig;
} }
uint my_caseup_8bit(CHARSET_INFO * cs, char *src, uint srclen, uint my_caseup_8bit(CHARSET_INFO * cs, char *src, uint srclen,
char *dst __attribute__((unused)), char *dst __attribute__((unused)),
uint dstlen __attribute__((unused))) uint dstlen __attribute__((unused)))
......
...@@ -159,13 +159,13 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen, ...@@ -159,13 +159,13 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen,
} }
static void my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)), static uint my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)),
char * s __attribute__((unused))) char * s __attribute__((unused)))
{ {
return 0;
} }
static uint my_casedn_ucs2(CHARSET_INFO *cs, char *src, uint srclen, static uint my_casedn_ucs2(CHARSET_INFO *cs, char *src, uint srclen,
char *dst __attribute__((unused)), char *dst __attribute__((unused)),
uint dstlen __attribute__((unused))) uint dstlen __attribute__((unused)))
...@@ -188,9 +188,11 @@ static uint my_casedn_ucs2(CHARSET_INFO *cs, char *src, uint srclen, ...@@ -188,9 +188,11 @@ static uint my_casedn_ucs2(CHARSET_INFO *cs, char *src, uint srclen,
return srclen; return srclen;
} }
static void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
static uint my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
char * s __attribute__((unused))) char * s __attribute__((unused)))
{ {
return 0;
} }
......
...@@ -2045,6 +2045,52 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)), ...@@ -2045,6 +2045,52 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
return MY_CS_ILSEQ; return MY_CS_ILSEQ;
} }
/*
The same as above, but without range check
for example, for a null-terminated string
*/
static int my_utf8_uni_no_range(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t * pwc, const uchar *s)
{
unsigned char c;
c= s[0];
if (c < 0x80)
{
*pwc = c;
return 1;
}
if (c < 0xc2)
return MY_CS_ILSEQ;
if (c < 0xe0)
{
if (!((s[1] ^ 0x80) < 0x40))
return MY_CS_ILSEQ;
*pwc = ((my_wc_t) (c & 0x1f) << 6) | (my_wc_t) (s[1] ^ 0x80);
return 2;
}
if (c < 0xf0)
{
if (!((s[1] ^ 0x80) < 0x40 &&
(s[2] ^ 0x80) < 0x40 &&
(c >= 0xe1 || s[1] >= 0xa0)))
return MY_CS_ILSEQ;
*pwc= ((my_wc_t) (c & 0x0f) << 12) |
((my_wc_t) (s[1] ^ 0x80) << 6) |
(my_wc_t) (s[2] ^ 0x80);
return 3;
}
return MY_CS_ILSEQ;
}
static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) , static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) ,
my_wc_t wc, uchar *r, uchar *e) my_wc_t wc, uchar *r, uchar *e)
{ {
...@@ -2091,6 +2137,34 @@ static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) , ...@@ -2091,6 +2137,34 @@ static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) ,
} }
/*
The same as above, but without range check.
*/
static int my_uni_utf8_no_range(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t wc, uchar *r)
{
int count;
if (wc < 0x80)
count= 1;
else if (wc < 0x800)
count= 2;
else if (wc < 0x10000)
count= 3;
else
return MY_CS_ILUNI;
switch (count)
{
/* Fall through all cases!!! */
case 3: r[2]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0x800;
case 2: r[1]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0xc0;
case 1: r[0]= (uchar) wc;
}
return count;
}
static uint my_caseup_utf8(CHARSET_INFO *cs, char *src, uint srclen, static uint my_caseup_utf8(CHARSET_INFO *cs, char *src, uint srclen,
char *dst, uint dstlen) char *dst, uint dstlen)
{ {
...@@ -2141,10 +2215,26 @@ static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, uint slen, ...@@ -2141,10 +2215,26 @@ static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, uint slen,
} }
static void my_caseup_str_utf8(CHARSET_INFO * cs, char * s) static uint my_caseup_str_utf8(CHARSET_INFO *cs, char *src)
{ {
uint len= (uint) strlen(s); my_wc_t wc;
my_caseup_utf8(cs, s, len, s, len); int srcres, dstres;
char *dst= src, *dst0= src;
MY_UNICASE_INFO **uni_plane= cs->caseinfo;
DBUG_ASSERT(cs->caseup_multiply == 1);
while (*src &&
(srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0)
{
int plane= (wc>>8) & 0xFF;
wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0)
break;
src+= srcres;
dst+= dstres;
}
*dst= '\0';
return (uint) (dst - dst0);
} }
...@@ -2170,10 +2260,43 @@ static uint my_casedn_utf8(CHARSET_INFO *cs, char *src, uint srclen, ...@@ -2170,10 +2260,43 @@ static uint my_casedn_utf8(CHARSET_INFO *cs, char *src, uint srclen,
return (uint) (dst - dst0); return (uint) (dst - dst0);
} }
static void my_casedn_str_utf8(CHARSET_INFO *cs, char * s)
static uint my_casedn_str_utf8(CHARSET_INFO *cs, char *src)
{ {
uint len= (uint) strlen(s); my_wc_t wc;
my_casedn_utf8(cs, s, len, s, len); int srcres, dstres;
char *dst= src, *dst0= src;
MY_UNICASE_INFO **uni_plane= cs->caseinfo;
DBUG_ASSERT(cs->casedn_multiply == 1);
while (*src &&
(srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0)
{
int plane= (wc>>8) & 0xFF;
wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0)
break;
src+= srcres;
dst+= dstres;
}
/*
In rare cases lower string can be shorter than
the original string, for example:
"U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE"
(which is 0xC4B0 in utf8, i.e. two bytes)
is converted into
"U+0069 LATIN SMALL LETTER I"
(which is 0x69 in utf8, i.e. one byte)
So, we need to put '\0' terminator after converting.
*/
*dst= '\0';
return (uint) (dst - dst0);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment