Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
63e1d22f
Commit
63e1d22f
authored
Mar 26, 2004
by
unknown
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
UTF8 now process space as PAD character correctly.
parent
403948cb
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
170 additions
and
8 deletions
+170
-8
mysql-test/r/compare.result
mysql-test/r/compare.result
+24
-0
mysql-test/r/ctype_utf8.result
mysql-test/r/ctype_utf8.result
+24
-0
mysql-test/t/compare.test
mysql-test/t/compare.test
+17
-0
mysql-test/t/ctype_utf8.test
mysql-test/t/ctype_utf8.test
+17
-0
strings/ctype-utf8.c
strings/ctype-utf8.c
+88
-8
No files found.
mysql-test/r/compare.result
View file @
63e1d22f
...
@@ -12,3 +12,27 @@ select * from t1;
...
@@ -12,3 +12,27 @@ select * from t1;
id
id
000000000001
000000000001
drop table t1;
drop table t1;
SELECT 'a' = 'a ';
'a' = 'a '
1
SELECT 'a\0' < 'a';
'a\0' < 'a'
1
SELECT 'a\0' < 'a ';
'a\0' < 'a '
1
SELECT 'a\t' < 'a';
'a\t' < 'a'
1
SELECT 'a\t' < 'a ';
'a\t' < 'a '
1
CREATE TABLE t1 (a char(10) not null);
INSERT INTO t1 VALUES ('a'),('a\0'),('a\t'),('a ');
SELECT hex(a),STRCMP(a,'a'), STRCMP(a,'a ') FROM t1;
hex(a) STRCMP(a,'a') STRCMP(a,'a ')
61 0 0
6100 -1 -1
6109 -1 -1
61 0 0
DROP TABLE t1;
mysql-test/r/ctype_utf8.result
View file @
63e1d22f
...
@@ -63,6 +63,30 @@ select 'A' like 'a' collate utf8_bin;
...
@@ -63,6 +63,30 @@ select 'A' like 'a' collate utf8_bin;
select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%');
select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%');
_utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%')
_utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%')
1
1
SELECT 'a' = 'a ';
'a' = 'a '
1
SELECT 'a\0' < 'a';
'a\0' < 'a'
1
SELECT 'a\0' < 'a ';
'a\0' < 'a '
1
SELECT 'a\t' < 'a';
'a\t' < 'a'
1
SELECT 'a\t' < 'a ';
'a\t' < 'a '
1
CREATE TABLE t1 (a char(10) character set utf8 not null);
INSERT INTO t1 VALUES ('a'),('a\0'),('a\t'),('a ');
SELECT hex(a),STRCMP(a,'a'), STRCMP(a,'a ') FROM t1;
hex(a) STRCMP(a,'a') STRCMP(a,'a ')
61 0 0
6100 -1 -1
6109 -1 -1
61 0 0
DROP TABLE t1;
select insert('txs',2,1,'hi'),insert('is ',4,0,'a'),insert('txxxxt',2,4,'es');
select insert('txs',2,1,'hi'),insert('is ',4,0,'a'),insert('txxxxt',2,4,'es');
insert('txs',2,1,'hi') insert('is ',4,0,'a') insert('txxxxt',2,4,'es')
insert('txs',2,1,'hi') insert('is ',4,0,'a') insert('txxxxt',2,4,'es')
this is a test
this is a test
...
...
mysql-test/t/compare.test
View file @
63e1d22f
...
@@ -13,3 +13,20 @@ select * from t1 where id=000000000001;
...
@@ -13,3 +13,20 @@ select * from t1 where id=000000000001;
delete
from
t1
where
id
=
000000000002
;
delete
from
t1
where
id
=
000000000002
;
select
*
from
t1
;
select
*
from
t1
;
drop
table
t1
;
drop
table
t1
;
#
# Check the following:
# "a" == "a "
# "a\0" < "a"
# "a\0" < "a "
SELECT
'a'
=
'a '
;
SELECT
'a\0'
<
'a'
;
SELECT
'a\0'
<
'a '
;
SELECT
'a\t'
<
'a'
;
SELECT
'a\t'
<
'a '
;
CREATE
TABLE
t1
(
a
char
(
10
)
not
null
);
INSERT
INTO
t1
VALUES
(
'a'
),(
'a\0'
),(
'a\t'
),(
'a '
);
SELECT
hex
(
a
),
STRCMP
(
a
,
'a'
),
STRCMP
(
a
,
'a '
)
FROM
t1
;
DROP
TABLE
t1
;
mysql-test/t/ctype_utf8.test
View file @
63e1d22f
...
@@ -33,6 +33,23 @@ select 'A' like 'a';
...
@@ -33,6 +33,23 @@ select 'A' like 'a';
select
'A'
like
'a'
collate
utf8_bin
;
select
'A'
like
'a'
collate
utf8_bin
;
select
_utf8
0xD0B0D0B1D0B2
like
concat
(
_utf8
'%'
,
_utf8
0xD0B1
,
_utf8
'%'
);
select
_utf8
0xD0B0D0B1D0B2
like
concat
(
_utf8
'%'
,
_utf8
0xD0B1
,
_utf8
'%'
);
#
# Check the following:
# "a" == "a "
# "a\0" < "a"
# "a\0" < "a "
SELECT
'a'
=
'a '
;
SELECT
'a\0'
<
'a'
;
SELECT
'a\0'
<
'a '
;
SELECT
'a\t'
<
'a'
;
SELECT
'a\t'
<
'a '
;
CREATE
TABLE
t1
(
a
char
(
10
)
character
set
utf8
not
null
);
INSERT
INTO
t1
VALUES
(
'a'
),(
'a\0'
),(
'a\t'
),(
'a '
);
SELECT
hex
(
a
),
STRCMP
(
a
,
'a'
),
STRCMP
(
a
,
'a '
)
FROM
t1
;
DROP
TABLE
t1
;
#
#
# Fix this, it should return 1:
# Fix this, it should return 1:
#
#
...
...
strings/ctype-utf8.c
View file @
63e1d22f
...
@@ -1837,18 +1837,98 @@ static int my_strnncoll_utf8(CHARSET_INFO *cs,
...
@@ -1837,18 +1837,98 @@ static int my_strnncoll_utf8(CHARSET_INFO *cs,
}
}
/*
/*
TODO: Has to be fixed as strnncollsp in ctype-simple
Compare strings, discarding end space
SYNOPSIS
my_strnncollsp_utf8()
cs character set handler
a First string to compare
a_length Length of 'a'
b Second string to compare
b_length Length of 'b'
IMPLEMENTATION
If one string is shorter as the other, then we space extend the other
so that the strings have equal length.
This will ensure that the following things hold:
"a" == "a "
"a\0" < "a"
"a\0" < "a "
RETURN
< 0 a < b
= 0 a == b
> 0 a > b
*/
*/
static
static
int
my_strnncollsp_utf8
(
CHARSET_INFO
*
cs
,
int
my_strnncollsp_utf8
(
CHARSET_INFO
*
cs
,
const
uchar
*
s
,
uint
slen
,
const
uchar
*
s
,
uint
slen
,
const
uchar
*
t
,
uint
tlen
)
const
uchar
*
t
,
uint
tlen
)
{
{
for
(
;
slen
&&
s
[
slen
-
1
]
==
' '
;
slen
--
);
int
s_res
,
t_res
;
for
(
;
tlen
&&
t
[
tlen
-
1
]
==
' '
;
tlen
--
);
my_wc_t
s_wc
,
t_wc
;
return
my_strnncoll_utf8
(
cs
,
s
,
slen
,
t
,
tlen
);
const
uchar
*
se
=
s
+
slen
;
const
uchar
*
te
=
t
+
tlen
;
while
(
s
<
se
&&
t
<
te
)
{
int
plane
;
s_res
=
my_utf8_uni
(
cs
,
&
s_wc
,
s
,
se
);
t_res
=
my_utf8_uni
(
cs
,
&
t_wc
,
t
,
te
);
if
(
s_res
<=
0
||
t_res
<=
0
)
{
/* Incorrect string, compare by char value */
return
((
int
)
s
[
0
]
-
(
int
)
t
[
0
]);
}
plane
=
(
s_wc
>>
8
)
&
0xFF
;
s_wc
=
uni_plane
[
plane
]
?
uni_plane
[
plane
][
s_wc
&
0xFF
].
sort
:
s_wc
;
plane
=
(
t_wc
>>
8
)
&
0xFF
;
t_wc
=
uni_plane
[
plane
]
?
uni_plane
[
plane
][
t_wc
&
0xFF
].
sort
:
t_wc
;
if
(
s_wc
!=
t_wc
)
{
return
((
int
)
s_wc
)
-
((
int
)
t_wc
);
}
s
+=
s_res
;
t
+=
t_res
;
}
slen
=
se
-
s
;
tlen
=
te
-
t
;
if
(
slen
!=
tlen
)
{
int
swap
=
0
;
if
(
slen
<
tlen
)
{
slen
=
tlen
;
s
=
t
;
se
=
te
;
swap
=
-
1
;
}
/*
This following loop uses the fact that in UTF-8
all multibyte characters are greater than space,
and all multibyte head characters are greater than
space. It means if we meet a character greater
than space, it always means that the longer string
is greater. So we can reuse the same loop from the
8bit version, without having to process full multibute
sequences.
*/
for
(
;
s
<
se
;
s
++
)
{
if
(
*
s
!=
' '
)
return
((
int
)
*
s
-
(
int
)
' '
)
^
swap
;
}
}
return
0
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment