Commit 2c7b6214 authored by Alexander Barkov's avatar Alexander Barkov

A cleanup for MDEV-17088 Provide tools to encode/decode mysql-encoded file system names

- Load and convert the entire input file at once,
  rather than reading string-by-string using fgets().
  This change makes it possible to convert from UCS2, UTF16, UTF32 data.
- Adding the --delimiter command, to treat the specified
  characters as delimiters rather than data to convert.
  Useful in combination with `-f filename` or `-t filename`.
  The delimiter characters are not converted,
  they are copied from the input to the output as is.
- Adding diagnostics with line number and position if:
  * an illegal input byte sequence was found
  * a character cannot be converted to the target character set
parent 42a4ae54
...@@ -80,7 +80,8 @@ MYSQL_ADD_EXECUTABLE(mysqlslap mysqlslap.c) ...@@ -80,7 +80,8 @@ MYSQL_ADD_EXECUTABLE(mysqlslap mysqlslap.c)
SET_SOURCE_FILES_PROPERTIES(mysqlslap.c PROPERTIES COMPILE_FLAGS "-DTHREADS") SET_SOURCE_FILES_PROPERTIES(mysqlslap.c PROPERTIES COMPILE_FLAGS "-DTHREADS")
TARGET_LINK_LIBRARIES(mysqlslap ${CLIENT_LIB}) TARGET_LINK_LIBRARIES(mysqlslap ${CLIENT_LIB})
MYSQL_ADD_EXECUTABLE(mariadb-conv mariadb-conv.cc) MYSQL_ADD_EXECUTABLE(mariadb-conv mariadb-conv.cc
${CMAKE_SOURCE_DIR}/sql/sql_string.cc)
TARGET_LINK_LIBRARIES(mariadb-conv mysys strings) TARGET_LINK_LIBRARIES(mariadb-conv mysys strings)
# "WIN32" also covers 64 bit. "echo" is used in some files below "mysql-test/". # "WIN32" also covers 64 bit. "echo" is used in some files below "mysql-test/".
......
This diff is collapsed.
...@@ -21,6 +21,8 @@ BINARY CONVERT(a USING filename) ...@@ -21,6 +21,8 @@ BINARY CONVERT(a USING filename)
2 2
test/.frm test/.frm
test/2.frm test/2.frm
test/.frm
test/2.frm
DROP TABLE t1; DROP TABLE t1;
# bulk convert with file # bulk convert with file
# --- Start of mariadb-conv for mysql-conv-test-cp932.txt --- # --- Start of mariadb-conv for mysql-conv-test-cp932.txt ---
......
...@@ -10,26 +10,27 @@ SET NAMES cp932; ...@@ -10,26 +10,27 @@ SET NAMES cp932;
--let $MYSQLD_DATADIR= `select @@datadir` --let $MYSQLD_DATADIR= `select @@datadir`
# simple I/O # simple I/O
--exec echo "" | $MARIADB_CONV -f cp932 -t filename --exec echo "" | $MARIADB_CONV -f cp932 -t filename --delimiter="\r\n"
--exec echo "@6e2c@8a66@8cc7@6599@5eab" | $MARIADB_CONV -f filename -t cp932 --exec echo "@6e2c@8a66@8cc7@6599@5eab" | $MARIADB_CONV -f filename -t cp932 --delimiter="\r\n"
# undo query result # undo query result
--let $query_result=`SELECT CONVERT(CONVERT('' USING filename) USING binary);` --let $query_result=`SELECT CONVERT(CONVERT('' USING filename) USING binary);`
--echo $query_result --echo $query_result
--exec echo $query_result | $MARIADB_CONV -f filename -t cp932 --exec echo $query_result | $MARIADB_CONV -f filename -t cp932 --delimiter="\r\n"
--let $reverse_query_result=`SELECT CONVERT(_filename '@6e2c@8a66@8cc7@6599@5eab' USING cp932);` --let $reverse_query_result=`SELECT CONVERT(_filename '@6e2c@8a66@8cc7@6599@5eab' USING cp932);`
--echo $reverse_query_result --echo $reverse_query_result
--exec echo $reverse_query_result | $MARIADB_CONV -f cp932 -t filename --exec echo $reverse_query_result | $MARIADB_CONV -f cp932 -t filename --delimiter="\r\n"
--echo # bulk convert with pipe --echo # bulk convert with pipe
CREATE TABLE t1 (id SERIAL, a VARCHAR(64) CHARACTER SET cp932); CREATE TABLE t1 (id SERIAL, a VARCHAR(64) CHARACTER SET cp932);
INSERT INTO t1 (a) VALUES (''), ('2'); INSERT INTO t1 (a) VALUES (''), ('2');
--exec $MYSQL -Dtest --default-character-set=cp932 -e "SELECT a FROM t1 ORDER BY id" | $MARIADB_CONV -f cp932 -t filename --exec $MYSQL -Dtest --default-character-set=cp932 -e "SELECT a FROM t1 ORDER BY id" | $MARIADB_CONV -f cp932 -t filename --delimiter="\r\n"
--exec $MYSQL -Dtest --default-character-set=cp932 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id" --exec $MYSQL -Dtest --default-character-set=cp932 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id"
--exec $MYSQL -Dtest --default-character-set=cp932 --column-names=0 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id" | $MARIADB_CONV -f filename -t cp932 --exec $MYSQL -Dtest --default-character-set=cp932 --column-names=0 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id" | $MARIADB_CONV -f filename -t cp932 --delimiter="\r\n"
--exec $MYSQL -Dtest --default-character-set=cp932 --column-names=0 -e "SELECT CONCAT('test/', BINARY CONVERT(a USING filename),'.frm') FROM t1 ORDER BY id" | $REPLACE "/" "@002f" "." "@002e"| $MARIADB_CONV -f filename -t cp932 --exec $MYSQL -Dtest --default-character-set=cp932 --column-names=0 -e "SELECT CONCAT('test/', BINARY CONVERT(a USING filename),'.frm') FROM t1 ORDER BY id" | $REPLACE "/" "@002f" "." "@002e"| $MARIADB_CONV -f filename -t cp932 --delimiter="\r\n"
--exec $MYSQL -Dtest --default-character-set=cp932 --column-names=0 -e "SELECT CONCAT('test/', BINARY CONVERT(a USING filename),'.frm') FROM t1 ORDER BY id" | $MARIADB_CONV -f filename -t cp932 --delimiter="/.\r\n"
DROP TABLE t1; DROP TABLE t1;
...@@ -44,12 +45,12 @@ DROP TABLE t1; ...@@ -44,12 +45,12 @@ DROP TABLE t1;
EOF EOF
--echo # --- Start of mariadb-conv for mysql-conv-test-cp932.txt --- --echo # --- Start of mariadb-conv for mysql-conv-test-cp932.txt ---
--exec $MARIADB_CONV -f cp932 -t filename $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt --exec $MARIADB_CONV -f cp932 -t filename --delimiter="\r\n" $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt
--echo # --- End of mariadb-conv for mysql-conv-test-cp932.txt --- --echo # --- End of mariadb-conv for mysql-conv-test-cp932.txt ---
--copy_file $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt $MYSQL_TMP_DIR/mysql-conv-test-cp932-2.txt --copy_file $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt $MYSQL_TMP_DIR/mysql-conv-test-cp932-2.txt
--echo # --- Start of mariadb-conv for mysql-conv-test-cp932.txt and mysql-conv-test-cp932-2.txt --- --echo # --- Start of mariadb-conv for mysql-conv-test-cp932.txt and mysql-conv-test-cp932-2.txt ---
--exec $MARIADB_CONV -f cp932 -t filename $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt $MYSQL_TMP_DIR/mysql-conv-test-cp932-2.txt --exec $MARIADB_CONV -f cp932 -t filename --delimiter="\r\n" $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt $MYSQL_TMP_DIR/mysql-conv-test-cp932-2.txt
--echo # --- Start of mariadb-conv for mysql-conv-test-cp932.txt and mysql-conv-test-cp932-2.txt --- --echo # --- Start of mariadb-conv for mysql-conv-test-cp932.txt and mysql-conv-test-cp932-2.txt ---
--remove_file $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt --remove_file $MYSQL_TMP_DIR/mysql-conv-test-cp932.txt
......
#
# MDEV-17088 Provide tools to encode/decode mysql-encoded file system names
#
SET NAMES utf8;
# Bad delimiter
--delimiter cannot be used with utf16 to utf8 conversion
# Bad delimiter
--delimiter cannot be used with utf8 to utf16 conversion
# Start of file01.utf16.txt
aaa
xxxяяяxxx
bbb
# End of file01.utf16.txt
-- source include/have_utf16.inc
-- source include/not_embedded.inc
--echo #
--echo # MDEV-17088 Provide tools to encode/decode mysql-encoded file system names
--echo #
--character_set utf8
SET NAMES utf8;
--echo # Bad delimiter
--error 1
--exec $MARIADB_CONV -f utf16 -t utf8 --delimiter="\r\n" $MYSQL_TEST_DIR/std_data/mariadb-conv/file01.utf16.txt 2>&1
--echo # Bad delimiter
--error 1
--exec $MARIADB_CONV -f utf8 -t utf16 --delimiter="\r\n" $MYSQL_TEST_DIR/std_data/mariadb-conv/file01.utf8.txt 2>&1
--echo # Start of file01.utf16.txt
--exec $MARIADB_CONV -f utf16 -t utf8 $MYSQL_TEST_DIR/std_data/mariadb-conv/file01.utf16.txt 2>&1
--echo # End of file01.utf16.txt
...@@ -21,6 +21,8 @@ BINARY CONVERT(a USING filename) ...@@ -21,6 +21,8 @@ BINARY CONVERT(a USING filename)
測試資料2 測試資料2
test/測試資料.frm test/測試資料.frm
test/測試資料2.frm test/測試資料2.frm
test/測試資料.frm
test/測試資料2.frm
DROP TABLE t1; DROP TABLE t1;
# bulk convert with file # bulk convert with file
# --- Start of mariadb-conv for mysql-conv-test-utf8.txt --- # --- Start of mariadb-conv for mysql-conv-test-utf8.txt ---
......
...@@ -10,26 +10,27 @@ SET NAMES utf8; ...@@ -10,26 +10,27 @@ SET NAMES utf8;
--let $MYSQLD_DATADIR= `select @@datadir` --let $MYSQLD_DATADIR= `select @@datadir`
# simple I/O # simple I/O
--exec echo "測試資料" | $MARIADB_CONV -f utf8 -t filename --exec echo "測試資料" | $MARIADB_CONV -f utf8 -t filename --delimiter="\r\n"
--exec echo "@6e2c@8a66@8cc7@6599@5eab" | $MARIADB_CONV -f filename -t utf8 --exec echo "@6e2c@8a66@8cc7@6599@5eab" | $MARIADB_CONV -f filename -t utf8 --delimiter="\r\n"
# undo query result # undo query result
--let $query_result=`SELECT CONVERT(CONVERT('測試資料' USING filename) USING binary);` --let $query_result=`SELECT CONVERT(CONVERT('測試資料' USING filename) USING binary);`
--echo $query_result --echo $query_result
--exec echo $query_result | $MARIADB_CONV -f filename -t utf8 --exec echo $query_result | $MARIADB_CONV -f filename -t utf8 --delimiter="\r\n"
--let $reverse_query_result=`SELECT CONVERT(_filename '@6e2c@8a66@8cc7@6599@5eab' USING utf8);` --let $reverse_query_result=`SELECT CONVERT(_filename '@6e2c@8a66@8cc7@6599@5eab' USING utf8);`
--echo $reverse_query_result --echo $reverse_query_result
--exec echo $reverse_query_result | $MARIADB_CONV -f utf8 -t filename --exec echo $reverse_query_result | $MARIADB_CONV -f utf8 -t filename --delimiter="\r\n"
--echo # bulk convert with pipe --echo # bulk convert with pipe
CREATE TABLE t1 (id SERIAL, a VARCHAR(64) CHARACTER SET utf8); CREATE TABLE t1 (id SERIAL, a VARCHAR(64) CHARACTER SET utf8);
INSERT INTO t1 (a) VALUES ('測試資料'), ('測試資料2'); INSERT INTO t1 (a) VALUES ('測試資料'), ('測試資料2');
--exec $MYSQL -Dtest --default-character-set=utf8 -e "SELECT a FROM t1 ORDER BY id" | $MARIADB_CONV -f utf8 -t filename --exec $MYSQL -Dtest --default-character-set=utf8 -e "SELECT a FROM t1 ORDER BY id" | $MARIADB_CONV -f utf8 -t filename --delimiter="\r\n"
--exec $MYSQL -Dtest --default-character-set=utf8 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id" --exec $MYSQL -Dtest --default-character-set=utf8 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id"
--exec $MYSQL -Dtest --default-character-set=utf8 --column-names=0 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id" | $MARIADB_CONV -f filename -t utf8 --exec $MYSQL -Dtest --default-character-set=utf8 --column-names=0 -e "SELECT BINARY CONVERT(a USING filename) FROM t1 ORDER BY id" | $MARIADB_CONV -f filename -t utf8 --delimiter="\r\n"
--exec $MYSQL -Dtest --default-character-set=utf8 --column-names=0 -e "SELECT CONCAT('test/', BINARY CONVERT(a USING filename),'.frm') FROM t1 ORDER BY id" | $REPLACE "/" "@002f" "." "@002e"| $MARIADB_CONV -f filename -t utf8 --exec $MYSQL -Dtest --default-character-set=utf8 --column-names=0 -e "SELECT CONCAT('test/', BINARY CONVERT(a USING filename),'.frm') FROM t1 ORDER BY id" | $REPLACE "/" "@002f" "." "@002e"| $MARIADB_CONV -f filename -t utf8 --delimiter="\r\n"
--exec $MYSQL -Dtest --default-character-set=utf8 --column-names=0 -e "SELECT CONCAT('test/', BINARY CONVERT(a USING filename),'.frm') FROM t1 ORDER BY id" | $MARIADB_CONV -f filename -t utf8 --delimiter="/.\r\n"
DROP TABLE t1; DROP TABLE t1;
...@@ -44,12 +45,12 @@ DROP TABLE t1; ...@@ -44,12 +45,12 @@ DROP TABLE t1;
EOF EOF
--echo # --- Start of mariadb-conv for mysql-conv-test-utf8.txt --- --echo # --- Start of mariadb-conv for mysql-conv-test-utf8.txt ---
--exec $MARIADB_CONV -f utf8 -t filename $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt --exec $MARIADB_CONV -f utf8 -t filename --delimiter="\r\n" $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt
--echo # --- End of mariadb-conv for mysql-conv-test-utf8.txt --- --echo # --- End of mariadb-conv for mysql-conv-test-utf8.txt ---
--copy_file $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt $MYSQL_TMP_DIR/mysql-conv-test-utf8-2.txt --copy_file $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt $MYSQL_TMP_DIR/mysql-conv-test-utf8-2.txt
--echo # --- Start of mariadb-conv for mysql-conv-test-utf8.txt and mysql-conv-test-utf8-2.txt --- --echo # --- Start of mariadb-conv for mysql-conv-test-utf8.txt and mysql-conv-test-utf8-2.txt ---
--exec $MARIADB_CONV -f utf8 -t filename $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt $MYSQL_TMP_DIR/mysql-conv-test-utf8-2.txt --exec $MARIADB_CONV -f utf8 -t filename --delimiter="\r\n" $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt $MYSQL_TMP_DIR/mysql-conv-test-utf8-2.txt
--echo # --- Start of mariadb-conv for mysql-conv-test-utf8.txt and mysql-conv-test-utf8-2.txt --- --echo # --- Start of mariadb-conv for mysql-conv-test-utf8.txt and mysql-conv-test-utf8-2.txt ---
--remove_file $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt --remove_file $MYSQL_TMP_DIR/mysql-conv-test-utf8.txt
......
...@@ -11,3 +11,15 @@ mariadb-conv: unknown option '-r' ...@@ -11,3 +11,15 @@ mariadb-conv: unknown option '-r'
Character set unknown-cs is not supported Character set unknown-cs is not supported
# unknown "from" character set # unknown "from" character set
Character set unknown-cs is not supported Character set unknown-cs is not supported
# Bad delimiter
Bad --delimiter value
# Conversion error
Conversion from utf8 to latin1 failed at position 7
aaa
xxx???xxx
bbb
# Bad input character
Illegal utf8 byte sequence at position 7
aaa
xxx???xxx
bbb
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
--echo # default encoding --echo # default encoding
--exec echo "t1" | $MARIADB_CONV --exec echo "t1" | $MARIADB_CONV
--exec echo "t1" | $MARIADB_CONV -f filename --exec echo "t1" | $MARIADB_CONV -f filename --delimiter="\r\n"
--exec echo "t1" | $MARIADB_CONV -t filename --exec echo "t1" | $MARIADB_CONV -t filename --delimiter="\r\n"
--echo # invalid option --echo # invalid option
--replace_regex /.*mariadb-conv.*: unknown/mariadb-conv: unknown/ --replace_regex /.*mariadb-conv.*: unknown/mariadb-conv: unknown/
...@@ -23,3 +23,17 @@ ...@@ -23,3 +23,17 @@
--replace_regex /.*mariadb-conv.*: unknown/mariadb-conv: unknown/ --replace_regex /.*mariadb-conv.*: unknown/mariadb-conv: unknown/
--error 1 --error 1
--exec echo "t1" | $MARIADB_CONV -f unknown-cs -t latin1 2>&1 > /dev/null --exec echo "t1" | $MARIADB_CONV -f unknown-cs -t latin1 2>&1 > /dev/null
--echo # Bad delimiter
--error 1
--exec echo "t1" | $MARIADB_CONV --delimiter="\x" 2>&1 > /dev/null
--echo # Conversion error
--error 1
--exec $MARIADB_CONV -f utf8 -t latin1 < $MYSQL_TEST_DIR/std_data/mariadb-conv/file01.utf8.txt 2>&1
--exec $MARIADB_CONV -f utf8 -t latin1 -c < $MYSQL_TEST_DIR/std_data/mariadb-conv/file01.utf8.txt 2>&1
--echo # Bad input character
--error 1
--exec $MARIADB_CONV -f utf8 -t latin1 < $MYSQL_TEST_DIR/std_data/mariadb-conv/file02.latin1.txt 2>&1
--exec $MARIADB_CONV -f utf8 -t latin1 -c < $MYSQL_TEST_DIR/std_data/mariadb-conv/file02.latin1.txt 2>&1
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment