Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
30b4bb42
Commit
30b4bb42
authored
Apr 18, 2023
by
Alexander Barkov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
MDEV-31068 Reuse duplicate case conversion code in ctype-utf8.c and ctype-ucs2.c
parent
2230c2e7
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
78 additions
and
118 deletions
+78
-118
strings/ctype-ucs2.c
strings/ctype-ucs2.c
+6
-60
strings/ctype-unidata.h
strings/ctype-unidata.h
+56
-0
strings/ctype-utf8.c
strings/ctype-utf8.c
+16
-58
No files found.
strings/ctype-ucs2.c
View file @
30b4bb42
...
...
@@ -1284,24 +1284,6 @@ my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)),
const
char
charset_name_utf16le
[]
=
"utf16le"
;
#define charset_name_utf16le_length (sizeof(charset_name_utf16le)-1)
static
inline
void
my_tolower_utf16
(
MY_UNICASE_INFO
*
uni_plane
,
my_wc_t
*
wc
)
{
MY_UNICASE_CHARACTER
*
page
;
if
((
*
wc
<=
uni_plane
->
maxchar
)
&&
(
page
=
uni_plane
->
page
[
*
wc
>>
8
]))
*
wc
=
page
[
*
wc
&
0xFF
].
tolower
;
}
static
inline
void
my_toupper_utf16
(
MY_UNICASE_INFO
*
uni_plane
,
my_wc_t
*
wc
)
{
MY_UNICASE_CHARACTER
*
page
;
if
((
*
wc
<=
uni_plane
->
maxchar
)
&&
(
page
=
uni_plane
->
page
[
*
wc
>>
8
]))
*
wc
=
page
[
*
wc
&
0xFF
].
toupper
;
}
static
inline
void
my_tosort_utf16
(
MY_UNICASE_INFO
*
uni_plane
,
my_wc_t
*
wc
)
{
...
...
@@ -1335,7 +1317,7 @@ my_caseup_utf16(CHARSET_INFO *cs, const char *src, size_t srclen,
while
((
src
<
srcend
)
&&
(
res
=
mb_wc
(
cs
,
&
wc
,
(
uchar
*
)
src
,
(
uchar
*
)
srcend
))
>
0
)
{
my_toupper_u
tf16
(
uni_plane
,
&
wc
);
my_toupper_u
nicode
(
uni_plane
,
&
wc
);
if
(
res
!=
wc_mb
(
cs
,
wc
,
(
uchar
*
)
dst
,
(
uchar
*
)
dstend
))
break
;
src
+=
res
;
...
...
@@ -1393,7 +1375,7 @@ my_casedn_utf16(CHARSET_INFO *cs, const char *src, size_t srclen,
while
((
src
<
srcend
)
&&
(
res
=
mb_wc
(
cs
,
&
wc
,
(
uchar
*
)
src
,
(
uchar
*
)
srcend
))
>
0
)
{
my_tolower_u
tf16
(
uni_plane
,
&
wc
);
my_tolower_u
nicode
(
uni_plane
,
&
wc
);
if
(
res
!=
wc_mb
(
cs
,
wc
,
(
uchar
*
)
dst
,
(
uchar
*
)
dstend
))
break
;
src
+=
res
;
...
...
@@ -2196,24 +2178,6 @@ my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)),
}
static
inline
void
my_tolower_utf32
(
MY_UNICASE_INFO
*
uni_plane
,
my_wc_t
*
wc
)
{
MY_UNICASE_CHARACTER
*
page
;
if
((
*
wc
<=
uni_plane
->
maxchar
)
&&
(
page
=
uni_plane
->
page
[
*
wc
>>
8
]))
*
wc
=
page
[
*
wc
&
0xFF
].
tolower
;
}
static
inline
void
my_toupper_utf32
(
MY_UNICASE_INFO
*
uni_plane
,
my_wc_t
*
wc
)
{
MY_UNICASE_CHARACTER
*
page
;
if
((
*
wc
<=
uni_plane
->
maxchar
)
&&
(
page
=
uni_plane
->
page
[
*
wc
>>
8
]))
*
wc
=
page
[
*
wc
&
0xFF
].
toupper
;
}
static
inline
void
my_tosort_utf32
(
MY_UNICASE_INFO
*
uni_plane
,
my_wc_t
*
wc
)
{
...
...
@@ -2256,7 +2220,7 @@ my_caseup_utf32(CHARSET_INFO *cs, const char *src, size_t srclen,
while
((
src
<
srcend
)
&&
(
res
=
my_utf32_uni
(
cs
,
&
wc
,
(
uchar
*
)
src
,
(
uchar
*
)
srcend
))
>
0
)
{
my_toupper_u
tf32
(
uni_plane
,
&
wc
);
my_toupper_u
nicode
(
uni_plane
,
&
wc
);
if
(
res
!=
my_uni_utf32
(
cs
,
wc
,
(
uchar
*
)
dst
,
(
uchar
*
)
dstend
))
break
;
src
+=
res
;
...
...
@@ -2312,7 +2276,7 @@ my_casedn_utf32(CHARSET_INFO *cs, const char *src, size_t srclen,
while
((
res
=
my_utf32_uni
(
cs
,
&
wc
,
(
uchar
*
)
src
,
(
uchar
*
)
srcend
))
>
0
)
{
my_tolower_u
tf32
(
uni_plane
,
&
wc
);
my_tolower_u
nicode
(
uni_plane
,
&
wc
);
if
(
res
!=
my_uni_utf32
(
cs
,
wc
,
(
uchar
*
)
dst
,
(
uchar
*
)
dstend
))
break
;
src
+=
res
;
...
...
@@ -3118,24 +3082,6 @@ static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
}
static
inline
void
my_tolower_ucs2
(
MY_UNICASE_INFO
*
uni_plane
,
my_wc_t
*
wc
)
{
MY_UNICASE_CHARACTER
*
page
;
if
((
page
=
uni_plane
->
page
[(
*
wc
>>
8
)
&
0xFF
]))
*
wc
=
page
[
*
wc
&
0xFF
].
tolower
;
}
static
inline
void
my_toupper_ucs2
(
MY_UNICASE_INFO
*
uni_plane
,
my_wc_t
*
wc
)
{
MY_UNICASE_CHARACTER
*
page
;
if
((
page
=
uni_plane
->
page
[(
*
wc
>>
8
)
&
0xFF
]))
*
wc
=
page
[
*
wc
&
0xFF
].
toupper
;
}
static
inline
void
my_tosort_ucs2
(
MY_UNICASE_INFO
*
uni_plane
,
my_wc_t
*
wc
)
{
...
...
@@ -3157,7 +3103,7 @@ static size_t my_caseup_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen,
while
((
src
<
srcend
)
&&
(
res
=
my_ucs2_uni
(
cs
,
&
wc
,
(
uchar
*
)
src
,
(
uchar
*
)
srcend
))
>
0
)
{
my_toupper_u
cs2
(
uni_plane
,
&
wc
);
my_toupper_u
nicode_bmp
(
uni_plane
,
&
wc
);
if
(
res
!=
my_uni_ucs2
(
cs
,
wc
,
(
uchar
*
)
dst
,
(
uchar
*
)
dstend
))
break
;
src
+=
res
;
...
...
@@ -3208,7 +3154,7 @@ static size_t my_casedn_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen,
while
((
src
<
srcend
)
&&
(
res
=
my_ucs2_uni
(
cs
,
&
wc
,
(
uchar
*
)
src
,
(
uchar
*
)
srcend
))
>
0
)
{
my_tolower_u
cs2
(
uni_plane
,
&
wc
);
my_tolower_u
nicode_bmp
(
uni_plane
,
&
wc
);
if
(
res
!=
my_uni_ucs2
(
cs
,
wc
,
(
uchar
*
)
dst
,
(
uchar
*
)
dstend
))
break
;
src
+=
res
;
...
...
strings/ctype-unidata.h
View file @
30b4bb42
...
...
@@ -24,6 +24,62 @@ extern MY_UNICASE_CHARACTER *my_unicase_default_pages[256];
extern
MY_UNICASE_CHARACTER
my_unicase_mysql500_page00
[
256
];
extern
MY_UNICASE_CHARACTER
*
my_unicase_mysql500_pages
[
256
];
static
inline
my_wc_t
my_u300_tolower_7bit
(
uchar
ch
)
{
return
my_unicase_default_page00
[
ch
].
tolower
;
}
static
inline
my_wc_t
my_u300_toupper_7bit
(
uchar
ch
)
{
return
my_unicase_default_page00
[
ch
].
toupper
;
}
static
inline
void
my_tolower_unicode_bmp
(
MY_UNICASE_INFO
*
uni_plane
,
my_wc_t
*
wc
)
{
const
MY_UNICASE_CHARACTER
*
page
;
DBUG_ASSERT
(
*
wc
<=
uni_plane
->
maxchar
);
if
((
page
=
uni_plane
->
page
[
*
wc
>>
8
]))
*
wc
=
page
[
*
wc
&
0xFF
].
tolower
;
}
static
inline
void
my_toupper_unicode_bmp
(
MY_UNICASE_INFO
*
uni_plane
,
my_wc_t
*
wc
)
{
const
MY_UNICASE_CHARACTER
*
page
;
DBUG_ASSERT
(
*
wc
<=
uni_plane
->
maxchar
);
if
((
page
=
uni_plane
->
page
[
*
wc
>>
8
]))
*
wc
=
page
[
*
wc
&
0xFF
].
toupper
;
}
static
inline
void
my_tolower_unicode
(
MY_UNICASE_INFO
*
uni_plane
,
my_wc_t
*
wc
)
{
if
(
*
wc
<=
uni_plane
->
maxchar
)
{
const
MY_UNICASE_CHARACTER
*
page
;
if
((
page
=
uni_plane
->
page
[(
*
wc
>>
8
)]))
*
wc
=
page
[
*
wc
&
0xFF
].
tolower
;
}
}
static
inline
void
my_toupper_unicode
(
MY_UNICASE_INFO
*
uni_plane
,
my_wc_t
*
wc
)
{
if
(
*
wc
<=
uni_plane
->
maxchar
)
{
const
MY_UNICASE_CHARACTER
*
page
;
if
((
page
=
uni_plane
->
page
[(
*
wc
>>
8
)]))
*
wc
=
page
[
*
wc
&
0xFF
].
toupper
;
}
}
size_t
my_strxfrm_pad_nweights_unicode
(
uchar
*
str
,
uchar
*
strend
,
size_t
nweights
);
size_t
my_strxfrm_pad_unicode
(
uchar
*
str
,
uchar
*
strend
);
...
...
strings/ctype-utf8.c
View file @
30b4bb42
...
...
@@ -5207,24 +5207,6 @@ static int my_uni_utf8mb3_no_range(CHARSET_INFO *cs __attribute__((unused)),
}
static
inline
void
my_tolower_utf8mb3
(
MY_UNICASE_INFO
*
uni_plane
,
my_wc_t
*
wc
)
{
MY_UNICASE_CHARACTER
*
page
;
if
((
page
=
uni_plane
->
page
[(
*
wc
>>
8
)
&
0xFF
]))
*
wc
=
page
[
*
wc
&
0xFF
].
tolower
;
}
static
inline
void
my_toupper_utf8mb3
(
MY_UNICASE_INFO
*
uni_plane
,
my_wc_t
*
wc
)
{
MY_UNICASE_CHARACTER
*
page
;
if
((
page
=
uni_plane
->
page
[(
*
wc
>>
8
)
&
0xFF
]))
*
wc
=
page
[
*
wc
&
0xFF
].
toupper
;
}
static
size_t
my_caseup_utf8mb3
(
CHARSET_INFO
*
cs
,
const
char
*
src
,
size_t
srclen
,
char
*
dst
,
size_t
dstlen
)
...
...
@@ -5239,7 +5221,7 @@ static size_t my_caseup_utf8mb3(CHARSET_INFO *cs,
while
((
src
<
srcend
)
&&
(
srcres
=
my_utf8mb3_uni
(
cs
,
&
wc
,
(
uchar
*
)
src
,
(
uchar
*
)
srcend
))
>
0
)
{
my_toupper_u
tf8mb3
(
uni_plane
,
&
wc
);
my_toupper_u
nicode_bmp
(
uni_plane
,
&
wc
);
if
((
dstres
=
my_uni_utf8mb3
(
cs
,
wc
,
(
uchar
*
)
dst
,
(
uchar
*
)
dstend
))
<=
0
)
break
;
src
+=
srcres
;
...
...
@@ -5292,7 +5274,7 @@ static size_t my_caseup_str_utf8mb3(CHARSET_INFO *cs, char *src)
while
(
*
src
&&
(
srcres
=
my_utf8mb3_uni_no_range
(
cs
,
&
wc
,
(
uchar
*
)
src
))
>
0
)
{
my_toupper_u
tf8mb3
(
uni_plane
,
&
wc
);
my_toupper_u
nicode_bmp
(
uni_plane
,
&
wc
);
if
((
dstres
=
my_uni_utf8mb3_no_range
(
cs
,
wc
,
(
uchar
*
)
dst
))
<=
0
)
break
;
src
+=
srcres
;
...
...
@@ -5317,7 +5299,7 @@ static size_t my_casedn_utf8mb3(CHARSET_INFO *cs,
while
((
src
<
srcend
)
&&
(
srcres
=
my_utf8mb3_uni
(
cs
,
&
wc
,
(
uchar
*
)
src
,
(
uchar
*
)
srcend
))
>
0
)
{
my_tolower_u
tf8mb3
(
uni_plane
,
&
wc
);
my_tolower_u
nicode_bmp
(
uni_plane
,
&
wc
);
if
((
dstres
=
my_uni_utf8mb3
(
cs
,
wc
,
(
uchar
*
)
dst
,
(
uchar
*
)
dstend
))
<=
0
)
break
;
src
+=
srcres
;
...
...
@@ -5338,7 +5320,7 @@ static size_t my_casedn_str_utf8mb3(CHARSET_INFO *cs, char *src)
while
(
*
src
&&
(
srcres
=
my_utf8mb3_uni_no_range
(
cs
,
&
wc
,
(
uchar
*
)
src
))
>
0
)
{
my_tolower_u
tf8mb3
(
uni_plane
,
&
wc
);
my_tolower_u
nicode_bmp
(
uni_plane
,
&
wc
);
if
((
dstres
=
my_uni_utf8mb3_no_range
(
cs
,
wc
,
(
uchar
*
)
dst
))
<=
0
)
break
;
src
+=
srcres
;
...
...
@@ -5397,7 +5379,7 @@ int my_strcasecmp_utf8mb3(CHARSET_INFO *cs, const char *s, const char *t)
It represents a single byte character.
Convert it into weight according to collation.
*/
s_wc
=
my_u
nicase_default_page00
[(
uchar
)
s
[
0
]].
tolower
;
s_wc
=
my_u
300_tolower_7bit
((
uchar
)
s
[
0
])
;
s
++
;
}
else
...
...
@@ -5430,7 +5412,7 @@ int my_strcasecmp_utf8mb3(CHARSET_INFO *cs, const char *s, const char *t)
s
+=
res
;
/* Convert Unicode code into weight according to collation */
my_tolower_u
tf8mb3
(
uni_plane
,
&
s_wc
);
my_tolower_u
nicode_bmp
(
uni_plane
,
&
s_wc
);
}
...
...
@@ -5439,7 +5421,7 @@ int my_strcasecmp_utf8mb3(CHARSET_INFO *cs, const char *s, const char *t)
if
((
uchar
)
t
[
0
]
<
128
)
{
/* Convert single byte character into weight */
t_wc
=
my_u
nicase_default_page00
[(
uchar
)
t
[
0
]].
tolower
;
t_wc
=
my_u
300_tolower_7bit
((
uchar
)
t
[
0
])
;
t
++
;
}
else
...
...
@@ -5450,7 +5432,7 @@ int my_strcasecmp_utf8mb3(CHARSET_INFO *cs, const char *s, const char *t)
t
+=
res
;
/* Convert code into weight */
my_tolower_u
tf8mb3
(
uni_plane
,
&
t_wc
);
my_tolower_u
nicode_bmp
(
uni_plane
,
&
t_wc
);
}
/* Now we have two weights, let's compare them */
...
...
@@ -7678,30 +7660,6 @@ my_wc_mb_utf8mb4_no_range(CHARSET_INFO *cs __attribute__((unused)),
}
static
inline
void
my_tolower_utf8mb4
(
MY_UNICASE_INFO
*
uni_plane
,
my_wc_t
*
wc
)
{
if
(
*
wc
<=
uni_plane
->
maxchar
)
{
MY_UNICASE_CHARACTER
*
page
;
if
((
page
=
uni_plane
->
page
[(
*
wc
>>
8
)]))
*
wc
=
page
[
*
wc
&
0xFF
].
tolower
;
}
}
static
inline
void
my_toupper_utf8mb4
(
MY_UNICASE_INFO
*
uni_plane
,
my_wc_t
*
wc
)
{
if
(
*
wc
<=
uni_plane
->
maxchar
)
{
MY_UNICASE_CHARACTER
*
page
;
if
((
page
=
uni_plane
->
page
[(
*
wc
>>
8
)]))
*
wc
=
page
[
*
wc
&
0xFF
].
toupper
;
}
}
static
size_t
my_caseup_utf8mb4
(
CHARSET_INFO
*
cs
,
const
char
*
src
,
size_t
srclen
,
char
*
dst
,
size_t
dstlen
)
...
...
@@ -7717,7 +7675,7 @@ my_caseup_utf8mb4(CHARSET_INFO *cs, const char *src, size_t srclen,
(
srcres
=
my_mb_wc_utf8mb4
(
cs
,
&
wc
,
(
uchar
*
)
src
,
(
uchar
*
)
srcend
))
>
0
)
{
my_toupper_u
tf8mb4
(
uni_plane
,
&
wc
);
my_toupper_u
nicode
(
uni_plane
,
&
wc
);
if
((
dstres
=
my_wc_mb_utf8mb4
(
cs
,
wc
,
(
uchar
*
)
dst
,
(
uchar
*
)
dstend
))
<=
0
)
break
;
src
+=
srcres
;
...
...
@@ -7784,7 +7742,7 @@ my_caseup_str_utf8mb4(CHARSET_INFO *cs, char *src)
while
(
*
src
&&
(
srcres
=
my_mb_wc_utf8mb4_no_range
(
cs
,
&
wc
,
(
uchar
*
)
src
))
>
0
)
{
my_toupper_u
tf8mb4
(
uni_plane
,
&
wc
);
my_toupper_u
nicode
(
uni_plane
,
&
wc
);
if
((
dstres
=
my_wc_mb_utf8mb4_no_range
(
cs
,
wc
,
(
uchar
*
)
dst
))
<=
0
)
break
;
src
+=
srcres
;
...
...
@@ -7811,7 +7769,7 @@ my_casedn_utf8mb4(CHARSET_INFO *cs,
(
srcres
=
my_mb_wc_utf8mb4
(
cs
,
&
wc
,
(
uchar
*
)
src
,
(
uchar
*
)
srcend
))
>
0
)
{
my_tolower_u
tf8mb4
(
uni_plane
,
&
wc
);
my_tolower_u
nicode
(
uni_plane
,
&
wc
);
if
((
dstres
=
my_wc_mb_utf8mb4
(
cs
,
wc
,
(
uchar
*
)
dst
,
(
uchar
*
)
dstend
))
<=
0
)
break
;
src
+=
srcres
;
...
...
@@ -7833,7 +7791,7 @@ my_casedn_str_utf8mb4(CHARSET_INFO *cs, char *src)
while
(
*
src
&&
(
srcres
=
my_mb_wc_utf8mb4_no_range
(
cs
,
&
wc
,
(
uchar
*
)
src
))
>
0
)
{
my_tolower_u
tf8mb4
(
uni_plane
,
&
wc
);
my_tolower_u
nicode
(
uni_plane
,
&
wc
);
if
((
dstres
=
my_wc_mb_utf8mb4_no_range
(
cs
,
wc
,
(
uchar
*
)
dst
))
<=
0
)
break
;
src
+=
srcres
;
...
...
@@ -7888,7 +7846,7 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t)
It represents a single byte character.
Convert it into weight according to collation.
*/
s_wc
=
my_u
nicase_default_page00
[(
uchar
)
s
[
0
]].
tolower
;
s_wc
=
my_u
300_tolower_7bit
((
uchar
)
s
[
0
])
;
s
++
;
}
else
...
...
@@ -7903,7 +7861,7 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t)
return
strcmp
(
s
,
t
);
s
+=
res
;
my_tolower_u
tf8mb4
(
uni_plane
,
&
s_wc
);
my_tolower_u
nicode
(
uni_plane
,
&
s_wc
);
}
...
...
@@ -7912,7 +7870,7 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t)
if
((
uchar
)
t
[
0
]
<
128
)
{
/* Convert single byte character into weight */
t_wc
=
my_u
nicase_default_page00
[(
uchar
)
t
[
0
]].
tolower
;
t_wc
=
my_u
300_tolower_7bit
((
uchar
)
t
[
0
])
;
t
++
;
}
else
...
...
@@ -7922,7 +7880,7 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t)
return
strcmp
(
s
,
t
);
t
+=
res
;
my_tolower_u
tf8mb4
(
uni_plane
,
&
t_wc
);
my_tolower_u
nicode
(
uni_plane
,
&
t_wc
);
}
/* Now we have two weights, let's compare them */
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment