Commit e9fde8f5 authored by bar@mysql.com's avatar bar@mysql.com

Allow cp932 characters to be stored in a SJIS column

parent eda3eb37
...@@ -41,3 +41,9 @@ C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF ...@@ -41,3 +41,9 @@ C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF
select hex(CONVERT(@utf84 USING sjis)); select hex(CONVERT(@utf84 USING sjis));
hex(CONVERT(@utf84 USING sjis)) hex(CONVERT(@utf84 USING sjis))
D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF
create table t1 (a char(10) character set sjis);
insert into t1 values (0x878A);
select hex(a) from t1;
hex(a)
878A
drop table t1;
...@@ -32,3 +32,12 @@ select hex(CONVERT(@utf81 USING sjis)); ...@@ -32,3 +32,12 @@ select hex(CONVERT(@utf81 USING sjis));
select hex(CONVERT(@utf82 USING sjis)); select hex(CONVERT(@utf82 USING sjis));
select hex(CONVERT(@utf83 USING sjis)); select hex(CONVERT(@utf83 USING sjis));
select hex(CONVERT(@utf84 USING sjis)); select hex(CONVERT(@utf84 USING sjis));
#
# Allow to insert extra CP932 characters
# into a SJIS column
#
create table t1 (a char(10) character set sjis);
insert into t1 values (0x878A);
select hex(a) from t1;
drop table t1;
...@@ -4563,6 +4563,40 @@ uint my_numcells_sjis(CHARSET_INFO *cs __attribute__((unused)), ...@@ -4563,6 +4563,40 @@ uint my_numcells_sjis(CHARSET_INFO *cs __attribute__((unused)),
return clen; return clen;
} }
/*
Returns a well formed length of a SJIS string.
CP932 additional characters are also accepted.
*/
static
uint my_well_formed_len_sjis(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e, uint pos)
{
const char *b0= b;
while (pos && b < e)
{
/*
Cast to int8 for extra safety.
"char" can be unsigned by default
on some platforms.
*/
if (((int8)b[0]) >= 0)
{
/* Single byte character */
b+= 1;
}
else if (issjishead((uchar)*b) && (e-b)>1 && issjistail((uchar)b[1]))
{
/* Double byte character */
b+= 2;
}
else
{
/* Wrong byte sequence */
break;
}
}
return b - b0;
}
static MY_COLLATION_HANDLER my_collation_ci_handler = static MY_COLLATION_HANDLER my_collation_ci_handler =
...@@ -4586,7 +4620,7 @@ static MY_CHARSET_HANDLER my_charset_handler= ...@@ -4586,7 +4620,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
mbcharlen_sjis, mbcharlen_sjis,
my_numchars_mb, my_numchars_mb,
my_charpos_mb, my_charpos_mb,
my_well_formed_len_mb, my_well_formed_len_sjis,
my_lengthsp_8bit, my_lengthsp_8bit,
my_numcells_sjis, my_numcells_sjis,
my_mb_wc_sjis, /* mb_wc */ my_mb_wc_sjis, /* mb_wc */
......
...@@ -2127,7 +2127,12 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t) ...@@ -2127,7 +2127,12 @@ int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
{ {
my_wc_t s_wc,t_wc; my_wc_t s_wc,t_wc;
if (s[0] >= 0) /*
Cast to int8 for extra safety.
char can be unsigned by default
on some platforms.
*/
if (((int8)s[0]) >= 0)
{ {
/* /*
s[0] is between 0 and 127. s[0] is between 0 and 127.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment