Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
mariadb
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
mariadb
Commits
65294b3b
Commit
65294b3b
authored
Mar 29, 2005
by
kent@mysql.com
Browse files
Options
Browse Files
Download
Plain Diff
Merge kboortz@bk-internal.mysql.com:/home/bk/mysql-4.1
into mysql.com:/Users/kent/mysql/bk/mysql-4.1-perl
parents
0544c68e
396bbdbe
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
293 additions
and
14 deletions
+293
-14
mysys/charset-def.c
mysys/charset-def.c
+6
-0
mysys/default.c
mysys/default.c
+121
-14
strings/ctype-utf8.c
strings/ctype-utf8.c
+166
-0
No files found.
mysys/charset-def.c
View file @
65294b3b
...
...
@@ -62,6 +62,9 @@ extern CHARSET_INFO my_charset_utf8_slovak_uca_ci;
extern
CHARSET_INFO
my_charset_utf8_spanish2_uca_ci
;
extern
CHARSET_INFO
my_charset_utf8_roman_uca_ci
;
extern
CHARSET_INFO
my_charset_utf8_persian_uca_ci
;
#ifdef HAVE_CYBOZU_COLLATION
extern
CHARSET_INFO
my_charset_utf8_general_cs
;
#endif
#endif
#endif
/* HAVE_UCA_COLLATIONS */
...
...
@@ -146,6 +149,9 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
#ifdef HAVE_CHARSET_utf8
add_compiled_collation
(
&
my_charset_utf8_general_ci
);
add_compiled_collation
(
&
my_charset_utf8_bin
);
#ifdef HAVE_CYBOZU_COLLATION
add_compiled_collation
(
&
my_charset_utf8_general_cs
);
#endif
#ifdef HAVE_UCA_COLLATIONS
add_compiled_collation
(
&
my_charset_utf8_general_uca_ci
);
add_compiled_collation
(
&
my_charset_utf8_icelandic_uca_ci
);
...
...
mysys/default.c
View file @
65294b3b
...
...
@@ -73,7 +73,7 @@ static int search_default_file(DYNAMIC_ARRAY *args,MEM_ROOT *alloc,
static
int
search_default_file_with_ext
(
DYNAMIC_ARRAY
*
args
,
MEM_ROOT
*
alloc
,
const
char
*
dir
,
const
char
*
ext
,
const
char
*
config_file
,
TYPELIB
*
group
);
TYPELIB
*
group
,
int
recursion_level
);
static
char
*
remove_end_comment
(
char
*
ptr
);
...
...
@@ -194,7 +194,7 @@ int load_defaults(const char *conf_file, const char **groups,
{
if
((
error
=
search_default_file_with_ext
(
&
args
,
&
alloc
,
""
,
""
,
forced_default_file
,
&
group
))
<
0
)
&
group
,
0
))
<
0
)
goto
err
;
if
(
error
>
0
)
{
...
...
@@ -311,7 +311,7 @@ static int search_default_file(DYNAMIC_ARRAY *args, MEM_ROOT *alloc,
{
int
error
;
if
((
error
=
search_default_file_with_ext
(
args
,
alloc
,
dir
,
*
ext
,
config_file
,
group
))
<
0
)
config_file
,
group
,
0
))
<
0
)
return
error
;
}
return
0
;
...
...
@@ -326,9 +326,11 @@ static int search_default_file(DYNAMIC_ARRAY *args, MEM_ROOT *alloc,
args Store pointer to found options here
alloc Allocate strings in this object
dir directory to read
config_file Name of configuration file
ext Extension for configuration file
config_file Name of configuration file
group groups to read
recursion_level the level of recursion, got while processing
"!include" or "!includedir"
RETURN
0 Success
...
...
@@ -340,12 +342,18 @@ static int search_default_file(DYNAMIC_ARRAY *args, MEM_ROOT *alloc,
static
int
search_default_file_with_ext
(
DYNAMIC_ARRAY
*
args
,
MEM_ROOT
*
alloc
,
const
char
*
dir
,
const
char
*
ext
,
const
char
*
config_file
,
TYPELIB
*
group
)
TYPELIB
*
group
,
int
recursion_level
)
{
char
name
[
FN_REFLEN
+
10
],
buff
[
4096
],
*
ptr
,
*
end
,
*
value
,
*
tmp
;
char
name
[
FN_REFLEN
+
10
],
buff
[
4096
],
*
ptr
,
*
end
,
*
value
,
*
tmp
,
**
tmp_ext
;
static
const
char
includedir_keyword
[]
=
"includedir"
;
static
const
char
include_keyword
[]
=
"include"
;
const
int
max_recursion_level
=
10
;
FILE
*
fp
;
uint
line
=
0
;
my_bool
read_values
=
0
,
found_group
=
0
;
uint
line
=
0
;
my_bool
read_values
=
0
,
found_group
=
0
;
uint
i
;
MY_DIR
*
search_dir
;
FILEINFO
*
search_file
;
if
((
dir
?
strlen
(
dir
)
:
0
)
+
strlen
(
config_file
)
>=
FN_REFLEN
-
3
)
return
0
;
/* Ignore wrong paths */
...
...
@@ -374,22 +382,121 @@ static int search_default_file_with_ext(DYNAMIC_ARRAY *args, MEM_ROOT *alloc,
if
((
stat_info
.
st_mode
&
S_IWOTH
)
&&
(
stat_info
.
st_mode
&
S_IFMT
)
==
S_IFREG
)
{
fprintf
(
stderr
,
"warning: World-writ
e
able config file %s is ignored
\n
"
,
fprintf
(
stderr
,
"warning: World-writable config file %s is ignored
\n
"
,
name
);
return
0
;
}
}
#endif
if
(
!
(
fp
=
my_fopen
(
fn_format
(
name
,
name
,
""
,
""
,
4
),
O_RDONLY
,
MYF
(
0
))))
if
(
!
(
fp
=
my_fopen
(
fn_format
(
name
,
name
,
""
,
""
,
4
),
O_RDONLY
,
MYF
(
0
))))
return
0
;
/* Ignore wrong files */
while
(
fgets
(
buff
,
sizeof
(
buff
)
-
1
,
fp
))
while
(
fgets
(
buff
,
sizeof
(
buff
)
-
1
,
fp
))
{
line
++
;
/* Ignore comment and empty lines */
for
(
ptr
=
buff
;
my_isspace
(
&
my_charset_latin1
,
*
ptr
)
;
ptr
++
)
;
for
(
ptr
=
buff
;
my_isspace
(
&
my_charset_latin1
,
*
ptr
);
ptr
++
)
{}
if
(
*
ptr
==
'#'
||
*
ptr
==
';'
||
!*
ptr
)
continue
;
/* Configuration File Directives */
if
((
*
ptr
==
'!'
)
&&
(
recursion_level
<
max_recursion_level
))
{
/* skip over `!' and following whitespace */
for
(
++
ptr
;
my_isspace
(
&
my_charset_latin1
,
ptr
[
0
]);
ptr
++
)
{}
if
((
!
strncmp
(
ptr
,
includedir_keyword
,
sizeof
(
includedir_keyword
)
-
1
))
&&
my_isspace
(
&
my_charset_latin1
,
ptr
[
sizeof
(
includedir_keyword
)
-
1
]))
{
/* skip over "includedir" and following whitespace */
for
(
ptr
+=
sizeof
(
includedir_keyword
)
-
1
;
my_isspace
(
&
my_charset_latin1
,
ptr
[
0
]);
ptr
++
)
{}
/* trim trailing whitespace from directory name */
end
=
ptr
+
strlen
(
ptr
)
-
1
;
/*
This would work fine even if no whitespaces are met
since fgets() stores the newline character in the buffer
*/
for
(;
my_isspace
(
&
my_charset_latin1
,
*
(
end
-
1
));
end
--
)
{}
end
[
0
]
=
0
;
/* print error msg if there is nothing after !inludedir directive */
if
(
end
==
ptr
)
{
fprintf
(
stderr
,
"error: Wrong !includedir directive in config "
"file: %s at line %d
\n
"
,
name
,
line
);
goto
err
;
}
if
(
!
(
search_dir
=
my_dir
(
ptr
,
MYF
(
MY_WME
))))
goto
err
;
for
(
i
=
0
;
i
<
(
uint
)
search_dir
->
number_off_files
;
i
++
)
{
search_file
=
search_dir
->
dir_entry
+
i
;
ext
=
fn_ext
(
search_file
->
name
);
/* check extenstion */
for
(
tmp_ext
=
(
char
**
)
f_extensions
;
*
tmp_ext
;
*
tmp_ext
++
)
{
if
(
!
strcmp
(
ext
,
*
tmp_ext
))
break
;
}
if
(
*
tmp_ext
)
{
if
(
!
(
tmp
=
alloc_root
(
alloc
,
2
+
strlen
(
search_file
->
name
)
+
strlen
(
ptr
))))
goto
err
;
fn_format
(
tmp
,
search_file
->
name
,
ptr
,
""
,
MY_UNPACK_FILENAME
|
MY_SAFE_PATH
);
search_default_file_with_ext
(
args
,
alloc
,
""
,
""
,
tmp
,
group
,
recursion_level
+
1
);
}
}
my_dirend
(
search_dir
);
}
else
if
((
!
strncmp
(
ptr
,
include_keyword
,
sizeof
(
include_keyword
)
-
1
))
&&
my_isspace
(
&
my_charset_latin1
,
ptr
[
sizeof
(
include_keyword
)
-
1
]))
{
/* skip over `include' and following whitespace */
for
(
ptr
+=
sizeof
(
include_keyword
)
-
1
;
my_isspace
(
&
my_charset_latin1
,
ptr
[
0
]);
ptr
++
)
{}
/* trim trailing whitespace from filename */
end
=
ptr
+
strlen
(
ptr
)
-
1
;
for
(;
my_isspace
(
&
my_charset_latin1
,
*
(
end
-
1
))
;
end
--
)
{}
end
[
0
]
=
0
;
if
(
end
==
ptr
)
{
fprintf
(
stderr
,
"error: Wrong !include directive in config "
"file: %s at line %d
\n
"
,
name
,
line
);
goto
err
;
}
search_default_file_with_ext
(
args
,
alloc
,
""
,
""
,
ptr
,
group
,
recursion_level
+
1
);
}
continue
;
}
if
(
*
ptr
==
'['
)
/* Group name */
{
found_group
=
1
;
...
...
strings/ctype-utf8.c
View file @
65294b3b
...
...
@@ -2380,6 +2380,172 @@ CHARSET_INFO my_charset_utf8_bin=
&
my_collation_mb_bin_handler
};
#ifdef HAVE_CYBOZU_COLLATION
/*
* These functions bacically do the same as their original, except
* that they return 0 only when two comparing unicode strings are
* strictly the same in case-sensitive way. See "save_diff" local
* variable to what they actually do.
*/
static
int
my_strnncoll_utf8_cs
(
CHARSET_INFO
*
cs
,
const
uchar
*
s
,
uint
slen
,
const
uchar
*
t
,
uint
tlen
,
my_bool
t_is_prefix
)
{
int
s_res
,
t_res
;
my_wc_t
s_wc
,
t_wc
;
const
uchar
*
se
=
s
+
slen
;
const
uchar
*
te
=
t
+
tlen
;
int
save_diff
=
0
;
int
diff
;
while
(
s
<
se
&&
t
<
te
)
{
int
plane
;
s_res
=
my_utf8_uni
(
cs
,
&
s_wc
,
s
,
se
);
t_res
=
my_utf8_uni
(
cs
,
&
t_wc
,
t
,
te
);
if
(
s_res
<=
0
||
t_res
<=
0
)
{
/* Incorrect string, compare by char value */
return
((
int
)
s
[
0
]
-
(
int
)
t
[
0
]);
}
if
(
save_diff
==
0
)
{
save_diff
=
((
int
)
s_wc
)
-
((
int
)
t_wc
);
}
plane
=
(
s_wc
>>
8
)
&
0xFF
;
s_wc
=
uni_plane
[
plane
]
?
uni_plane
[
plane
][
s_wc
&
0xFF
].
sort
:
s_wc
;
plane
=
(
t_wc
>>
8
)
&
0xFF
;
t_wc
=
uni_plane
[
plane
]
?
uni_plane
[
plane
][
t_wc
&
0xFF
].
sort
:
t_wc
;
if
(
s_wc
!=
t_wc
)
{
return
((
int
)
s_wc
)
-
((
int
)
t_wc
);
}
s
+=
s_res
;
t
+=
t_res
;
}
diff
=
(
(
se
-
s
)
-
(
te
-
t
)
);
return
t_is_prefix
?
t
-
te
:
((
diff
==
0
)
?
save_diff
:
diff
);
}
static
int
my_strnncollsp_utf8_cs
(
CHARSET_INFO
*
cs
,
const
uchar
*
s
,
uint
slen
,
const
uchar
*
t
,
uint
tlen
)
{
int
s_res
,
t_res
;
my_wc_t
s_wc
,
t_wc
;
const
uchar
*
se
=
s
+
slen
;
const
uchar
*
te
=
t
+
tlen
;
int
save_diff
=
0
;
while
(
s
<
se
&&
t
<
te
)
{
int
plane
;
s_res
=
my_utf8_uni
(
cs
,
&
s_wc
,
s
,
se
);
t_res
=
my_utf8_uni
(
cs
,
&
t_wc
,
t
,
te
);
if
(
s_res
<=
0
||
t_res
<=
0
)
{
/* Incorrect string, compare by char value */
return
((
int
)
s
[
0
]
-
(
int
)
t
[
0
]);
}
if
(
save_diff
==
0
)
{
save_diff
=
((
int
)
s_wc
)
-
((
int
)
t_wc
);
}
plane
=
(
s_wc
>>
8
)
&
0xFF
;
s_wc
=
uni_plane
[
plane
]
?
uni_plane
[
plane
][
s_wc
&
0xFF
].
sort
:
s_wc
;
plane
=
(
t_wc
>>
8
)
&
0xFF
;
t_wc
=
uni_plane
[
plane
]
?
uni_plane
[
plane
][
t_wc
&
0xFF
].
sort
:
t_wc
;
if
(
s_wc
!=
t_wc
)
{
return
((
int
)
s_wc
)
-
((
int
)
t_wc
);
}
s
+=
s_res
;
t
+=
t_res
;
}
slen
=
se
-
s
;
tlen
=
te
-
t
;
if
(
slen
!=
tlen
)
{
int
swap
=
0
;
if
(
slen
<
tlen
)
{
slen
=
tlen
;
s
=
t
;
se
=
te
;
swap
=
-
1
;
}
/*
This following loop uses the fact that in UTF-8
all multibyte characters are greater than space,
and all multibyte head characters are greater than
space. It means if we meet a character greater
than space, it always means that the longer string
is greater. So we can reuse the same loop from the
8bit version, without having to process full multibute
sequences.
*/
for
(
;
s
<
se
;
s
++
)
{
if
(
*
s
!=
' '
)
return
((
int
)
*
s
-
(
int
)
' '
)
^
swap
;
}
}
return
save_diff
;
}
static
MY_COLLATION_HANDLER
my_collation_cs_handler
=
{
NULL
,
/* init */
my_strnncoll_utf8_cs
,
my_strnncollsp_utf8_cs
,
my_strnxfrm_utf8
,
my_like_range_simple
,
my_wildcmp_mb
,
my_strcasecmp_utf8
,
my_instr_mb
,
my_hash_sort_utf8
};
CHARSET_INFO
my_charset_utf8_general_cs
=
{
254
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_UNICODE
,
/* state */
"utf8"
,
/* cs name */
"utf8_general_cs"
,
/* name */
""
,
/* comment */
NULL
,
/* tailoring */
ctype_utf8
,
/* ctype */
to_lower_utf8
,
/* to_lower */
to_upper_utf8
,
/* to_upper */
to_upper_utf8
,
/* sort_order */
NULL
,
/* contractions */
NULL
,
/* sort_order_big*/
NULL
,
/* tab_to_uni */
NULL
,
/* tab_from_uni */
NULL
,
/* state_map */
NULL
,
/* ident_map */
1
,
/* strxfrm_multiply */
1
,
/* mbminlen */
3
,
/* mbmaxlen */
0
,
/* min_sort_char */
255
,
/* max_sort_char */
&
my_charset_utf8_handler
,
&
my_collation_cs_handler
};
#endif
/* Cybozu Hack */
#ifdef MY_TEST_UTF8
#include <stdio.h>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment