Commit 5804bb4e authored by Vladislav Vaintroub's avatar Vladislav Vaintroub

MDEV-19750 mysql command wrong encoding

Restore the detection of default charset in command line utilities.
It worked up to 10.1, but was broken by Connector/C.

Moved code for detection of default charset from sql-common/client.c
to mysys, and make command line utilities to use this code if charset
was not specified on the command line.
parent 81f60e8a
......@@ -40,6 +40,7 @@
#include "my_readline.h"
#include <signal.h>
#include <violite.h>
#include <my_sys.h>
#if defined(USE_LIBEDIT_INTERFACE) && defined(HAVE_LOCALE_H)
#include <locale.h>
......@@ -4701,7 +4702,8 @@ sql_real_connect(char *host,char *database,char *user,char *password,
select_limit,max_join_size);
mysql_options(&mysql, MYSQL_INIT_COMMAND, init_command);
}
if (!strcmp(default_charset,MYSQL_AUTODETECT_CHARSET_NAME))
default_charset= (char *)my_default_csname();
mysql_options(&mysql, MYSQL_SET_CHARSET_NAME, default_charset);
if (!do_connect(&mysql, host, user, password, database,
......
......@@ -26,6 +26,7 @@
#include <welcome_copyright_notice.h>
#include <my_rnd.h>
#include <password.h>
#include <my_sys.h>
#define ADMIN_VERSION "9.1"
#define MAX_MYSQL_VAR 512
......@@ -371,6 +372,8 @@ int main(int argc,char *argv[])
if (shared_memory_base_name)
mysql_options(&mysql,MYSQL_SHARED_MEMORY_BASE_NAME,shared_memory_base_name);
#endif
if (!strcmp(default_charset,MYSQL_AUTODETECT_CHARSET_NAME))
default_charset= (char *)my_default_csname();
mysql_options(&mysql, MYSQL_SET_CHARSET_NAME, default_charset);
error_flags= (myf)(opt_nobeep ? 0 : ME_BELL);
......
......@@ -440,8 +440,10 @@ static int get_options(int *argc, char ***argv)
else
default_charset= (char*) MYSQL_AUTODETECT_CHARSET_NAME;
}
if (strcmp(default_charset, MYSQL_AUTODETECT_CHARSET_NAME) &&
!get_charset_by_csname(default_charset, MY_CS_PRIMARY, MYF(MY_WME)))
if (!strcmp(default_charset, MYSQL_AUTODETECT_CHARSET_NAME))
default_charset= (char *)my_default_csname();
if (!get_charset_by_csname(default_charset, MY_CS_PRIMARY, MYF(MY_WME)))
{
printf("Unsupported character set: %s\n", default_charset);
DBUG_RETURN(1);
......
......@@ -30,6 +30,8 @@
#define IMPORT_VERSION "3.7"
#include "client_priv.h"
#include <my_sys.h>
#include "mysql_version.h"
#include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
......@@ -472,8 +474,9 @@ static MYSQL *db_connect(char *host, char *database,
if (opt_default_auth && *opt_default_auth)
mysql_options(mysql, MYSQL_DEFAULT_AUTH, opt_default_auth);
mysql_options(mysql, MYSQL_SET_CHARSET_NAME, default_charset);
if (!strcmp(default_charset,MYSQL_AUTODETECT_CHARSET_NAME))
default_charset= (char *)my_default_csname();
mysql_options(mysql, MYSQL_SET_CHARSET_NAME, my_default_csname());
mysql_options(mysql, MYSQL_OPT_CONNECT_ATTR_RESET, 0);
mysql_options4(mysql, MYSQL_OPT_CONNECT_ATTR_ADD,
"program_name", "mysqlimport");
......
......@@ -135,6 +135,8 @@ int main(int argc, char **argv)
if (shared_memory_base_name)
mysql_options(&mysql,MYSQL_SHARED_MEMORY_BASE_NAME,shared_memory_base_name);
#endif
if (!strcmp(default_charset,MYSQL_AUTODETECT_CHARSET_NAME))
default_charset= (char *)my_default_csname();
mysql_options(&mysql, MYSQL_SET_CHARSET_NAME, default_charset);
if (opt_plugin_dir && *opt_plugin_dir)
......
......@@ -1043,6 +1043,7 @@ extern char *get_tty_password(const char *opt_message);
/* File system character set */
extern CHARSET_INFO *fs_character_set(void);
#endif
extern const char *my_default_csname(void);
extern size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info,
char *to, size_t to_length,
const char *from, size_t length);
......
@@character_set_client
cp1257
--source include/windows.inc
--exec chcp 1257 > NUL && $MYSQL --default-character-set=auto -e "select @@character_set_client"
......@@ -20,7 +20,12 @@
#include <m_string.h>
#include <my_dir.h>
#include <my_xml.h>
#ifdef HAVE_LANGINFO_H
#include <langinfo.h>
#endif
#ifdef HAVE_LOCALE_H
#include <locale.h>
#endif
/*
The code below implements this functionality:
......@@ -1216,3 +1221,214 @@ size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info,
*to= 0;
return overflow ? (ulong)~0 : (ulong) (to - to_start);
}
typedef enum my_cs_match_type_enum
{
/* MySQL and OS charsets are fully compatible */
my_cs_exact,
/* MySQL charset is very close to OS charset */
my_cs_approx,
/*
MySQL knows this charset, but it is not supported as client character set.
*/
my_cs_unsupp
} my_cs_match_type;
typedef struct str2str_st
{
const char* os_name;
const char* my_name;
my_cs_match_type param;
} MY_CSET_OS_NAME;
static const MY_CSET_OS_NAME charsets[] =
{
#ifdef _WIN32
{"cp437", "cp850", my_cs_approx},
{"cp850", "cp850", my_cs_exact},
{"cp852", "cp852", my_cs_exact},
{"cp858", "cp850", my_cs_approx},
{"cp866", "cp866", my_cs_exact},
{"cp874", "tis620", my_cs_approx},
{"cp932", "cp932", my_cs_exact},
{"cp936", "gbk", my_cs_approx},
{"cp949", "euckr", my_cs_approx},
{"cp950", "big5", my_cs_exact},
{"cp1200", "utf16le", my_cs_unsupp},
{"cp1201", "utf16", my_cs_unsupp},
{"cp1250", "cp1250", my_cs_exact},
{"cp1251", "cp1251", my_cs_exact},
{"cp1252", "latin1", my_cs_exact},
{"cp1253", "greek", my_cs_exact},
{"cp1254", "latin5", my_cs_exact},
{"cp1255", "hebrew", my_cs_approx},
{"cp1256", "cp1256", my_cs_exact},
{"cp1257", "cp1257", my_cs_exact},
{"cp10000", "macroman", my_cs_exact},
{"cp10001", "sjis", my_cs_approx},
{"cp10002", "big5", my_cs_approx},
{"cp10008", "gb2312", my_cs_approx},
{"cp10021", "tis620", my_cs_approx},
{"cp10029", "macce", my_cs_exact},
{"cp12001", "utf32", my_cs_unsupp},
{"cp20107", "swe7", my_cs_exact},
{"cp20127", "latin1", my_cs_approx},
{"cp20866", "koi8r", my_cs_exact},
{"cp20932", "ujis", my_cs_exact},
{"cp20936", "gb2312", my_cs_approx},
{"cp20949", "euckr", my_cs_approx},
{"cp21866", "koi8u", my_cs_exact},
{"cp28591", "latin1", my_cs_approx},
{"cp28592", "latin2", my_cs_exact},
{"cp28597", "greek", my_cs_exact},
{"cp28598", "hebrew", my_cs_exact},
{"cp28599", "latin5", my_cs_exact},
{"cp28603", "latin7", my_cs_exact},
#ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE
{"cp28605", "latin9", my_cs_exact},
#endif
{"cp38598", "hebrew", my_cs_exact},
{"cp51932", "ujis", my_cs_exact},
{"cp51936", "gb2312", my_cs_exact},
{"cp51949", "euckr", my_cs_exact},
{"cp51950", "big5", my_cs_exact},
#ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE
{"cp54936", "gb18030", my_cs_exact},
#endif
{"cp65001", "utf8", my_cs_exact},
#else /* not Windows */
{"646", "latin1", my_cs_approx}, /* Default on Solaris */
{"ANSI_X3.4-1968", "latin1", my_cs_approx},
{"ansi1251", "cp1251", my_cs_exact},
{"armscii8", "armscii8", my_cs_exact},
{"armscii-8", "armscii8", my_cs_exact},
{"ASCII", "latin1", my_cs_approx},
{"Big5", "big5", my_cs_exact},
{"cp1251", "cp1251", my_cs_exact},
{"cp1255", "hebrew", my_cs_approx},
{"CP866", "cp866", my_cs_exact},
{"eucCN", "gb2312", my_cs_exact},
{"euc-CN", "gb2312", my_cs_exact},
{"eucJP", "ujis", my_cs_exact},
{"euc-JP", "ujis", my_cs_exact},
{"eucKR", "euckr", my_cs_exact},
{"euc-KR", "euckr", my_cs_exact},
#ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE
{"gb18030", "gb18030", my_cs_exact},
#endif
{"gb2312", "gb2312", my_cs_exact},
{"gbk", "gbk", my_cs_exact},
{"georgianps", "geostd8", my_cs_exact},
{"georgian-ps", "geostd8", my_cs_exact},
{"IBM-1252", "cp1252", my_cs_exact},
{"iso88591", "latin1", my_cs_approx},
{"ISO_8859-1", "latin1", my_cs_approx},
{"ISO8859-1", "latin1", my_cs_approx},
{"ISO-8859-1", "latin1", my_cs_approx},
{"iso885913", "latin7", my_cs_exact},
{"ISO_8859-13", "latin7", my_cs_exact},
{"ISO8859-13", "latin7", my_cs_exact},
{"ISO-8859-13", "latin7", my_cs_exact},
#ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE
{"iso885915", "latin9", my_cs_exact},
{"ISO_8859-15", "latin9", my_cs_exact},
{"ISO8859-15", "latin9", my_cs_exact},
{"ISO-8859-15", "latin9", my_cs_exact},
#endif
{"iso88592", "latin2", my_cs_exact},
{"ISO_8859-2", "latin2", my_cs_exact},
{"ISO8859-2", "latin2", my_cs_exact},
{"ISO-8859-2", "latin2", my_cs_exact},
{"iso88597", "greek", my_cs_exact},
{"ISO_8859-7", "greek", my_cs_exact},
{"ISO8859-7", "greek", my_cs_exact},
{"ISO-8859-7", "greek", my_cs_exact},
{"iso88598", "hebrew", my_cs_exact},
{"ISO_8859-8", "hebrew", my_cs_exact},
{"ISO8859-8", "hebrew", my_cs_exact},
{"ISO-8859-8", "hebrew", my_cs_exact},
{"iso88599", "latin5", my_cs_exact},
{"ISO_8859-9", "latin5", my_cs_exact},
{"ISO8859-9", "latin5", my_cs_exact},
{"ISO-8859-9", "latin5", my_cs_exact},
{"koi8r", "koi8r", my_cs_exact},
{"KOI8-R", "koi8r", my_cs_exact},
{"koi8u", "koi8u", my_cs_exact},
{"KOI8-U", "koi8u", my_cs_exact},
{"roman8", "hp8", my_cs_exact}, /* Default on HP UX */
{"Shift_JIS", "sjis", my_cs_exact},
{"SJIS", "sjis", my_cs_exact},
{"shiftjisx0213", "sjis", my_cs_exact},
{"tis620", "tis620", my_cs_exact},
{"tis-620", "tis620", my_cs_exact},
{"ujis", "ujis", my_cs_exact},
{"US-ASCII", "latin1", my_cs_approx},
{"utf8", "utf8", my_cs_exact},
{"utf-8", "utf8", my_cs_exact},
#endif
{NULL, NULL, 0}
};
static const char*
my_os_charset_to_mysql_charset(const char* csname)
{
const MY_CSET_OS_NAME* csp;
for (csp = charsets; csp->os_name; csp++)
{
if (!strcasecmp(csp->os_name, csname))
{
switch (csp->param)
{
case my_cs_exact:
return csp->my_name;
case my_cs_approx:
/*
Maybe we should print a warning eventually:
character set correspondence is not exact.
*/
return csp->my_name;
default:
return NULL;
}
}
}
return NULL;
}
const char* my_default_csname()
{
const char* csname = NULL;
#ifdef _WIN32
char cpbuf[64];
int cp = GetConsoleCP();
if (cp == 0)
cp = GetACP();
snprintf(cpbuf, sizeof(cpbuf), "cp%d", (int)cp);
csname = my_os_charset_to_mysql_charset(cpbuf);
#elif defined(HAVE_SETLOCALE) && defined(HAVE_NL_LANGINFO)
if (setlocale(LC_CTYPE, "") && (csname = nl_langinfo(CODESET)))
csname = my_os_charset_to_mysql_charset(csname);
#endif
return csname ? csname : MYSQL_DEFAULT_CHARSET_NAME;
}
\ No newline at end of file
......@@ -1927,250 +1927,13 @@ static MYSQL_METHODS client_methods=
};
typedef enum my_cs_match_type_enum
{
/* MySQL and OS charsets are fully compatible */
my_cs_exact,
/* MySQL charset is very close to OS charset */
my_cs_approx,
/*
MySQL knows this charset, but it is not supported as client character set.
*/
my_cs_unsupp
} my_cs_match_type;
typedef struct str2str_st
{
const char *os_name;
const char *my_name;
my_cs_match_type param;
} MY_CSET_OS_NAME;
const MY_CSET_OS_NAME charsets[]=
{
#ifdef __WIN__
{"cp437", "cp850", my_cs_approx},
{"cp850", "cp850", my_cs_exact},
{"cp852", "cp852", my_cs_exact},
{"cp858", "cp850", my_cs_approx},
{"cp866", "cp866", my_cs_exact},
{"cp874", "tis620", my_cs_approx},
{"cp932", "cp932", my_cs_exact},
{"cp936", "gbk", my_cs_approx},
{"cp949", "euckr", my_cs_approx},
{"cp950", "big5", my_cs_exact},
{"cp1200", "utf16le", my_cs_unsupp},
{"cp1201", "utf16", my_cs_unsupp},
{"cp1250", "cp1250", my_cs_exact},
{"cp1251", "cp1251", my_cs_exact},
{"cp1252", "latin1", my_cs_exact},
{"cp1253", "greek", my_cs_exact},
{"cp1254", "latin5", my_cs_exact},
{"cp1255", "hebrew", my_cs_approx},
{"cp1256", "cp1256", my_cs_exact},
{"cp1257", "cp1257", my_cs_exact},
{"cp10000", "macroman", my_cs_exact},
{"cp10001", "sjis", my_cs_approx},
{"cp10002", "big5", my_cs_approx},
{"cp10008", "gb2312", my_cs_approx},
{"cp10021", "tis620", my_cs_approx},
{"cp10029", "macce", my_cs_exact},
{"cp12001", "utf32", my_cs_unsupp},
{"cp20107", "swe7", my_cs_exact},
{"cp20127", "latin1", my_cs_approx},
{"cp20866", "koi8r", my_cs_exact},
{"cp20932", "ujis", my_cs_exact},
{"cp20936", "gb2312", my_cs_approx},
{"cp20949", "euckr", my_cs_approx},
{"cp21866", "koi8u", my_cs_exact},
{"cp28591", "latin1", my_cs_approx},
{"cp28592", "latin2", my_cs_exact},
{"cp28597", "greek", my_cs_exact},
{"cp28598", "hebrew", my_cs_exact},
{"cp28599", "latin5", my_cs_exact},
{"cp28603", "latin7", my_cs_exact},
#ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE
{"cp28605", "latin9", my_cs_exact},
#endif
{"cp38598", "hebrew", my_cs_exact},
{"cp51932", "ujis", my_cs_exact},
{"cp51936", "gb2312", my_cs_exact},
{"cp51949", "euckr", my_cs_exact},
{"cp51950", "big5", my_cs_exact},
#ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE
{"cp54936", "gb18030", my_cs_exact},
#endif
{"cp65001", "utf8", my_cs_exact},
#else /* not Windows */
{"646", "latin1", my_cs_approx}, /* Default on Solaris */
{"ANSI_X3.4-1968", "latin1", my_cs_approx},
{"ansi1251", "cp1251", my_cs_exact},
{"armscii8", "armscii8", my_cs_exact},
{"armscii-8", "armscii8", my_cs_exact},
{"ASCII", "latin1", my_cs_approx},
{"Big5", "big5", my_cs_exact},
{"cp1251", "cp1251", my_cs_exact},
{"cp1255", "hebrew", my_cs_approx},
{"CP866", "cp866", my_cs_exact},
{"eucCN", "gb2312", my_cs_exact},
{"euc-CN", "gb2312", my_cs_exact},
{"eucJP", "ujis", my_cs_exact},
{"euc-JP", "ujis", my_cs_exact},
{"eucKR", "euckr", my_cs_exact},
{"euc-KR", "euckr", my_cs_exact},
#ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE
{"gb18030", "gb18030", my_cs_exact},
#endif
{"gb2312", "gb2312", my_cs_exact},
{"gbk", "gbk", my_cs_exact},
{"georgianps", "geostd8", my_cs_exact},
{"georgian-ps", "geostd8", my_cs_exact},
{"IBM-1252", "cp1252", my_cs_exact},
{"iso88591", "latin1", my_cs_approx},
{"ISO_8859-1", "latin1", my_cs_approx},
{"ISO8859-1", "latin1", my_cs_approx},
{"ISO-8859-1", "latin1", my_cs_approx},
{"iso885913", "latin7", my_cs_exact},
{"ISO_8859-13", "latin7", my_cs_exact},
{"ISO8859-13", "latin7", my_cs_exact},
{"ISO-8859-13", "latin7", my_cs_exact},
#ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE
{"iso885915", "latin9", my_cs_exact},
{"ISO_8859-15", "latin9", my_cs_exact},
{"ISO8859-15", "latin9", my_cs_exact},
{"ISO-8859-15", "latin9", my_cs_exact},
#endif
{"iso88592", "latin2", my_cs_exact},
{"ISO_8859-2", "latin2", my_cs_exact},
{"ISO8859-2", "latin2", my_cs_exact},
{"ISO-8859-2", "latin2", my_cs_exact},
{"iso88597", "greek", my_cs_exact},
{"ISO_8859-7", "greek", my_cs_exact},
{"ISO8859-7", "greek", my_cs_exact},
{"ISO-8859-7", "greek", my_cs_exact},
{"iso88598", "hebrew", my_cs_exact},
{"ISO_8859-8", "hebrew", my_cs_exact},
{"ISO8859-8", "hebrew", my_cs_exact},
{"ISO-8859-8", "hebrew", my_cs_exact},
{"iso88599", "latin5", my_cs_exact},
{"ISO_8859-9", "latin5", my_cs_exact},
{"ISO8859-9", "latin5", my_cs_exact},
{"ISO-8859-9", "latin5", my_cs_exact},
{"koi8r", "koi8r", my_cs_exact},
{"KOI8-R", "koi8r", my_cs_exact},
{"koi8u", "koi8u", my_cs_exact},
{"KOI8-U", "koi8u", my_cs_exact},
{"roman8", "hp8", my_cs_exact}, /* Default on HP UX */
{"Shift_JIS", "sjis", my_cs_exact},
{"SJIS", "sjis", my_cs_exact},
{"shiftjisx0213", "sjis", my_cs_exact},
{"tis620", "tis620", my_cs_exact},
{"tis-620", "tis620", my_cs_exact},
{"ujis", "ujis", my_cs_exact},
{"US-ASCII", "latin1", my_cs_approx},
{"utf8", "utf8", my_cs_exact},
{"utf-8", "utf8", my_cs_exact},
#endif
{NULL, NULL, 0}
};
static const char *
my_os_charset_to_mysql_charset(const char *csname)
{
const MY_CSET_OS_NAME *csp;
for (csp= charsets; csp->os_name; csp++)
{
if (!my_strcasecmp(&my_charset_latin1, csp->os_name, csname))
{
switch (csp->param)
{
case my_cs_exact:
return csp->my_name;
case my_cs_approx:
/*
Maybe we should print a warning eventually:
character set correspondence is not exact.
*/
return csp->my_name;
default:
my_printf_error(ER_UNKNOWN_ERROR,
"OS character set '%s'"
" is not supported by MySQL client",
MYF(0), csp->my_name);
goto def;
}
}
}
my_printf_error(ER_UNKNOWN_ERROR,
"Unknown OS character set '%s'.",
MYF(0), csname);
def:
csname= MYSQL_DEFAULT_CHARSET_NAME;
my_printf_error(ER_UNKNOWN_ERROR,
"Switching to the default character set '%s'.",
MYF(0), csname);
return csname;
}
#ifndef __WIN__
#include <stdlib.h> /* for getenv() */
#ifdef HAVE_LANGINFO_H
#include <langinfo.h>
#endif
#ifdef HAVE_LOCALE_H
#include <locale.h>
#endif
#endif /* __WIN__ */
#include <my_sys.h>
static int
mysql_autodetect_character_set(MYSQL *mysql)
{
const char *csname= MYSQL_DEFAULT_CHARSET_NAME;
#ifdef __WIN__
char cpbuf[64];
{
UINT cp= GetConsoleCP();
if (cp == 0)
cp= GetACP();
my_snprintf(cpbuf, sizeof(cpbuf), "cp%d", (int)cp);
csname= my_os_charset_to_mysql_charset(cpbuf);
}
#elif defined(HAVE_SETLOCALE) && defined(HAVE_NL_LANGINFO)
{
if (setlocale(LC_CTYPE, "") && (csname= nl_langinfo(CODESET)))
csname= my_os_charset_to_mysql_charset(csname);
}
#endif
if (mysql->options.charset_name)
my_free(mysql->options.charset_name);
if (!(mysql->options.charset_name= my_strdup(csname, MYF(MY_WME))))
if (!(mysql->options.charset_name= my_strdup(my_default_csname(),MYF(MY_WME))))
return 1;
return 0;
}
......@@ -2211,16 +1974,13 @@ C_MODE_START
int mysql_init_character_set(MYSQL *mysql)
{
/* Set character set */
if (!mysql->options.charset_name)
if (!mysql->options.charset_name ||
!strcmp(mysql->options.charset_name,
MYSQL_AUTODETECT_CHARSET_NAME))
{
if (!(mysql->options.charset_name=
my_strdup(MYSQL_DEFAULT_CHARSET_NAME,MYF(MY_WME))))
if (mysql_autodetect_character_set(mysql))
return 1;
}
else if (!strcmp(mysql->options.charset_name,
MYSQL_AUTODETECT_CHARSET_NAME) &&
mysql_autodetect_character_set(mysql))
return 1;
mysql_set_character_set_with_default_collation(mysql);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment