Commit db3b3c17 authored by unknown's avatar unknown

Associate a charset directly with its number in the Index file, and

propogate those changes through the code.  This is so that there can
be holes in the list of charsets without breaking old tables.


configure.in:
  - changed pattern for getting number from charsets Index file
mysys/charset.c:
  - changed from using a TYPELIB to a CS_ID struct, so both the name
    and the number of a charset is stored in available_charsets
sql/share/charsets/Index:
  - made the number a real part of the Index file, not just a comment
sql/share/charsets/README:
  - order is no longer significant, but each charset must be paired
    with its number
parent 49b83f38
...@@ -1578,15 +1578,6 @@ do ...@@ -1578,15 +1578,6 @@ do
fi fi
done done
default_charset_has_source=0
for cs in $COMPILED_CHARSETS
do
if test $cs = $default_charset
then
default_charset_has_source=1
fi
done
CHARSET_SRCS="" CHARSET_SRCS=""
CHARSETS_NEED_SOURCE="" CHARSETS_NEED_SOURCE=""
CHARSET_DECLARATIONS="" CHARSET_DECLARATIONS=""
...@@ -1600,8 +1591,10 @@ index_file="$srcdir/sql/share/charsets/Index" ...@@ -1600,8 +1591,10 @@ index_file="$srcdir/sql/share/charsets/Index"
for c in $CHARSETS for c in $CHARSETS
do do
# get the charset number from $index_file # get the charset number from $index_file
subpat='^'"${c}"'[[\t ]]*#' changequote(,)dnl
number=`$AWK 'sub("'"$subpat"'", "") { print }' $index_file` subpat='^'"${c}"'[ ][ ]*\([0-9][0-9]*\)[^0-9]*$'
number=`sed -e "/$subpat/!d" -e 's//\1/' $index_file`
changequote([,])dnl
# some sanity checking.... # some sanity checking....
if test X"$number" = X if test X"$number" = X
then then
......
...@@ -21,9 +21,14 @@ ...@@ -21,9 +21,14 @@
#include <m_string.h> #include <m_string.h>
#include <my_dir.h> #include <my_dir.h>
typedef struct cs_id_st {
char *name;
uint number;
} CS_ID;
const char *charsets_dir = NULL; const char *charsets_dir = NULL;
static DYNAMIC_ARRAY cs_info_table; static DYNAMIC_ARRAY cs_info_table;
static TYPELIB available_charsets; static CS_ID *available_charsets;
static int charset_initialized=0; static int charset_initialized=0;
#define MAX_LINE 1024 #define MAX_LINE 1024
...@@ -46,6 +51,24 @@ uint compiled_charset_number(const char *name); ...@@ -46,6 +51,24 @@ uint compiled_charset_number(const char *name);
const char *compiled_charset_name(uint charset_number); const char *compiled_charset_name(uint charset_number);
static uint num_from_csname(CS_ID *cs, const char *name)
{
CS_ID *c;
for (c = cs; c; ++c)
if (!strcmp(c->name, name))
return c->number;
return 0; /* this mimics find_type() */
}
static char *name_from_csnum(CS_ID *cs, uint number)
{
CS_ID *c;
for (c = cs; c; ++c)
if (c->number == number)
return c->name;
return "?"; /* this mimics find_type() */
}
static my_bool get_word(struct simpleconfig_buf_st *fb, char *buf) static my_bool get_word(struct simpleconfig_buf_st *fb, char *buf)
{ {
char *endptr=fb->p; char *endptr=fb->p;
...@@ -92,12 +115,12 @@ static char *get_charsets_dir(char *buf) ...@@ -92,12 +115,12 @@ static char *get_charsets_dir(char *buf)
} }
static my_bool read_charset_index(TYPELIB *charsets, myf myflags) static my_bool read_charset_index(CS_ID **charsets, myf myflags)
{ {
struct simpleconfig_buf_st fb; struct simpleconfig_buf_st fb;
char buf[MAX_LINE]; char buf[MAX_LINE], num_buf[MAX_LINE];
DYNAMIC_ARRAY cs; DYNAMIC_ARRAY cs;
my_string s; CS_ID *csid;
strmov(get_charsets_dir(buf), "Index"); strmov(get_charsets_dir(buf), "Index");
...@@ -106,36 +129,42 @@ static my_bool read_charset_index(TYPELIB *charsets, myf myflags) ...@@ -106,36 +129,42 @@ static my_bool read_charset_index(TYPELIB *charsets, myf myflags)
fb.buf[0] = '\0'; fb.buf[0] = '\0';
fb.p = fb.buf; fb.p = fb.buf;
if (init_dynamic_array(&cs, sizeof(my_string), 32, 32)) if (init_dynamic_array(&cs, sizeof(CS_ID *), 32, 32))
return TRUE; return TRUE;
while (!get_word(&fb, buf)) while (!get_word(&fb, buf) && !get_word(&fb, num_buf))
{ {
uint csnum;
uint length; uint length;
if (!(s= (char*) my_once_alloc(length= (uint) strlen(buf)+1, myflags)))
if (!(csnum = atoi(num_buf)))
{ {
/* corrupt Index file */
my_fclose(fb.f,myflags); my_fclose(fb.f,myflags);
return TRUE; return TRUE;
} }
memcpy(s,buf,length);
insert_dynamic(&cs, (gptr) &s); if (!(csid = (CS_ID*) my_once_alloc(sizeof(CS_ID), myflags)) ||
!(csid->name=
(char*) my_once_alloc(length= (uint) strlen(buf)+1, myflags)))
{
my_fclose(fb.f,myflags);
return TRUE;
}
memcpy(csid->name,buf,length);
csid->number = csnum;
insert_dynamic(&cs, (gptr) &csid);
} }
my_fclose(fb.f,myflags); my_fclose(fb.f,myflags);
/* I seriously doubt this is the best way to initialize this
* TYPELIB from the Index file. But it's the best way I could
* come up with right now. */
charsets->count = cs.elements; if (!(*charsets =
charsets->name = ""; (CS_ID *) my_once_alloc((cs.elements + 1) * sizeof(CS_ID *), myflags)))
if (!(charsets->type_names =
(const char **) my_once_alloc((cs.elements + 1) * sizeof(const char *),
myflags)))
return TRUE; return TRUE;
/* unwarranted chumminess with dynamic_array implementation? */ /* unwarranted chumminess with dynamic_array implementation? */
memcpy((char*) charsets->type_names, cs.buffer, memcpy((byte *) *charsets, cs.buffer, cs.elements * sizeof(CS_ID *));
cs.elements * sizeof(my_string *)); (*charsets)[cs.elements] = NULL;
charsets->type_names[cs.elements] = NullS;
delete_dynamic(&cs); delete_dynamic(&cs);
return FALSE; return FALSE;
...@@ -164,7 +193,7 @@ static my_bool init_available_charsets(myf myflags) ...@@ -164,7 +193,7 @@ static my_bool init_available_charsets(myf myflags)
charset_initialized=1; charset_initialized=1;
pthread_mutex_unlock(&THR_LOCK_charset); pthread_mutex_unlock(&THR_LOCK_charset);
} }
return error || available_charsets.count == 0; return error || !available_charsets[0];
} }
...@@ -193,7 +222,7 @@ static my_bool fill_array(uchar *array, int sz, struct simpleconfig_buf_st *fb) ...@@ -193,7 +222,7 @@ static my_bool fill_array(uchar *array, int sz, struct simpleconfig_buf_st *fb)
static void get_charset_conf_name(uint cs_number, char *buf) static void get_charset_conf_name(uint cs_number, char *buf)
{ {
strxmov(get_charsets_dir(buf), strxmov(get_charsets_dir(buf),
get_type(&available_charsets, cs_number - 1), ".conf", NullS); name_from_csnum(&available_charsets, cs_number), ".conf", NullS);
} }
...@@ -237,7 +266,7 @@ uint get_charset_number(const char *charset_name) ...@@ -237,7 +266,7 @@ uint get_charset_number(const char *charset_name)
if (error) if (error)
return compiled_charset_number(charset_name); return compiled_charset_number(charset_name);
else else
return find_type((char*)charset_name, &available_charsets, 1); return num_from_csname((char*)charset_name, &available_charsets, 1);
} }
const char *get_charset_name(uint charset_number) const char *get_charset_name(uint charset_number)
...@@ -247,7 +276,7 @@ const char *get_charset_name(uint charset_number) ...@@ -247,7 +276,7 @@ const char *get_charset_name(uint charset_number)
if (error) if (error)
return compiled_charset_name(charset_number); return compiled_charset_name(charset_number);
else else
return get_type(&available_charsets, charset_number - 1); return name_from_csnum(&available_charsets, charset_number);
} }
...@@ -452,29 +481,27 @@ char * list_charsets(myf want_flags) ...@@ -452,29 +481,27 @@ char * list_charsets(myf want_flags)
if (want_flags & MY_CONFIG_SETS) if (want_flags & MY_CONFIG_SETS)
{ {
uint i; CS_ID *c;
const char *cs_name;
char buf[FN_REFLEN]; char buf[FN_REFLEN];
MY_STAT stat; MY_STAT stat;
for (i = 0; i < available_charsets.count; i++) for (c = available_charsets; *c; ++c)
{ {
cs_name = get_type(&available_charsets, i); if (charset_in_string(c->name, &s))
if (charset_in_string(cs_name, &s))
continue; continue;
get_charset_conf_name(i + 1, buf); get_charset_conf_name(c->number, buf);
if (!my_stat(buf, &stat, MYF(0))) if (!my_stat(buf, &stat, MYF(0)))
continue; /* conf file doesn't exist */ continue; /* conf file doesn't exist */
dynstr_append(&s, cs_name); dynstr_append(&s, c->name);
dynstr_append(&s, " "); dynstr_append(&s, " ");
} }
} }
if (want_flags & MY_INDEX_SETS) if (want_flags & MY_INDEX_SETS)
{ {
uint i; CS_ID *c;
for (i = 0; i < available_charsets.count; i++) for (c = available_charsets; *c; ++c)
charset_append(&s, get_type(&available_charsets, i)); charset_append(&s, c->name);
} }
if (want_flags & MY_LOADED_SETS) if (want_flags & MY_LOADED_SETS)
......
...@@ -2,36 +2,33 @@ ...@@ -2,36 +2,33 @@
# #
# This file lists all of the available character sets. # This file lists all of the available character sets.
# THE ORDER IN WHICH CHARACTER SETS ARE LISTED IS IMPORTANT. See the
# README file in this directory for details.
big5 1
big5 # 1 czech 2
czech # 2 dec8 3
dec8 # 3 dos 4
dos # 4 german1 5
german1 # 5 hp8 6
hp8 # 6 koi8_ru 7
koi8_ru # 7 latin1 8
latin1 # 8 latin2 9
latin2 # 9 swe7 10
swe7 # 10 usa7 11
usa7 # 11 ujis 12
ujis # 12 sjis 13
sjis # 13 cp1251 14
cp1251 # 14 danish 15
danish # 15 hebrew 16
hebrew # 16 win1251 17
win1251 # 17 tis620 18
tis620 # 18 euc_kr 19
euc_kr # 19 estonia 20
estonia # 20 hungarian 21
hungarian # 21 koi8_ukr 22
koi8_ukr # 22 win1251ukr 23
win1251ukr # 23 gb2312 24
gb2312 # 24 greek 25
greek # 25 win1250 26
win1250 # 26 croat 27
croat # 27 gbk 28
gbk # 28 cp1257 29
cp1257 # 29
...@@ -9,10 +9,9 @@ different character sets. It contains: ...@@ -9,10 +9,9 @@ different character sets. It contains:
Index Index
The Index file lists all of the available charset configurations. The Index file lists all of the available charset configurations.
THE ORDER OF THE CHARACTER SETS IN THIS FILE IS SIGNIFICANT. Each charset is paired with a number. The number is stored
The first character set is number 1, the second is number 2, etc. The IN THE DATABASE TABLE FILES and must not be changed. Always
number is stored IN THE DATABASE TABLE FILES and must not be changed. add new character sets to the end of the list, so that the
Always add new character sets to the end of the list, so that the
numbers of the other character sets will not be changed. numbers of the other character sets will not be changed.
Compiled in or configuration file? Compiled in or configuration file?
...@@ -39,5 +38,3 @@ Syntax of configuration files ...@@ -39,5 +38,3 @@ Syntax of configuration files
number in hexadecimal format. The ctype array takes up the first number in hexadecimal format. The ctype array takes up the first
257 words; the to_lower, to_upper and sort_order arrays take up 256 257 words; the to_lower, to_upper and sort_order arrays take up 256
words each after that. words each after that.
The Index file is simply a list of the available character sets.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment