/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ #include "mysys_priv.h" #include "mysys_err.h" #include <m_ctype.h> #include <m_string.h> #include <my_dir.h> typedef struct cs_id_st { char *name; uint number; } CS_ID; const char *charsets_dir = NULL; static DYNAMIC_ARRAY cs_info_table; static CS_ID **available_charsets; static int charset_initialized=0; #define MAX_LINE 1024 #define CTYPE_TABLE_SIZE 257 #define TO_LOWER_TABLE_SIZE 256 #define TO_UPPER_TABLE_SIZE 256 #define SORT_ORDER_TABLE_SIZE 256 struct simpleconfig_buf_st { FILE *f; char buf[MAX_LINE]; char *p; }; /* Defined in strings/ctype.c */ CHARSET_INFO *find_compiled_charset(uint cs_number); uint compiled_charset_number(const char *name); const char *compiled_charset_name(uint charset_number); static uint num_from_csname(CS_ID **cs, const char *name) { CS_ID **c; for (c = cs; *c; ++c) if (!strcmp((*c)->name, name)) return (*c)->number; return 0; /* this mimics find_type() */ } static char *name_from_csnum(CS_ID **cs, uint number) { CS_ID **c; for (c = cs; *c; ++c) if ((*c)->number == number) return (*c)->name; return "?"; /* this mimics find_type() */ } static my_bool get_word(struct simpleconfig_buf_st *fb, char *buf) { char *endptr=fb->p; for (;;) { while (isspace(*endptr)) ++endptr; if (*endptr && *endptr != '#') /* Not comment */ break; /* Found something */ if ((fgets(fb->buf, sizeof(fb->buf), fb->f)) == NULL) return TRUE; /* end of file */ endptr = fb->buf; } while (!isspace(*endptr)) *buf++= *endptr++; *buf=0; fb->p = endptr; return FALSE; } static char *get_charsets_dir(char *buf) { const char *sharedir = SHAREDIR; DBUG_ENTER("get_charsets_dir"); if (charsets_dir != NULL) strnmov(buf, charsets_dir, FN_REFLEN); else { if (test_if_hard_path(sharedir) || is_prefix(sharedir, DEFAULT_CHARSET_HOME)) strxmov(buf, sharedir, "/", CHARSET_DIR, NullS); else strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR, NullS); } convert_dirname(buf); DBUG_PRINT("info",("charsets dir='%s'", buf)); DBUG_RETURN(strend(buf)); } static my_bool read_charset_index(CS_ID ***charsets, myf myflags) { struct simpleconfig_buf_st fb; char buf[MAX_LINE], num_buf[MAX_LINE]; DYNAMIC_ARRAY cs; CS_ID *csid; strmov(get_charsets_dir(buf), "Index"); if ((fb.f = my_fopen(buf, O_RDONLY, myflags)) == NULL) return TRUE; fb.buf[0] = '\0'; fb.p = fb.buf; if (init_dynamic_array(&cs, sizeof(CS_ID *), 32, 32)) return TRUE; while (!get_word(&fb, buf) && !get_word(&fb, num_buf)) { uint csnum; uint length; if (!(csnum = atoi(num_buf))) { /* corrupt Index file */ my_fclose(fb.f,myflags); return TRUE; } if (!(csid = (CS_ID*) my_once_alloc(sizeof(CS_ID), myflags)) || !(csid->name= (char*) my_once_alloc(length= (uint) strlen(buf)+1, myflags))) { my_fclose(fb.f,myflags); return TRUE; } memcpy(csid->name,buf,length); csid->number = csnum; insert_dynamic(&cs, (gptr) &csid); } my_fclose(fb.f,myflags); if (!(*charsets = (CS_ID **) my_once_alloc((cs.elements + 1) * sizeof(CS_ID *), myflags))) return TRUE; /* unwarranted chumminess with dynamic_array implementation? */ memcpy((byte *) *charsets, cs.buffer, cs.elements * sizeof(CS_ID *)); (*charsets)[cs.elements] = NULL; delete_dynamic(&cs); return FALSE; } static my_bool init_available_charsets(myf myflags) { my_bool error=0; /* We have to use charset_initialized to not lock on THR_LOCK_charset inside get_internal_charset... */ if (!charset_initialized) { /* To make things thread safe we are not allowing other threads to interfere while we may changing the cs_info_table */ pthread_mutex_lock(&THR_LOCK_charset); if (!cs_info_table.buffer) /* If not initialized */ { init_dynamic_array(&cs_info_table, sizeof(CHARSET_INFO*), 16, 8); error = read_charset_index(&available_charsets, myflags); } charset_initialized=1; pthread_mutex_unlock(&THR_LOCK_charset); } if(!available_charsets || !available_charsets[0]) error = TRUE; return error; } void free_charsets(void) { delete_dynamic(&cs_info_table); } static my_bool fill_array(uchar *array, int sz, struct simpleconfig_buf_st *fb) { char buf[MAX_LINE]; while (sz--) { if (get_word(fb, buf)) { DBUG_PRINT("error",("get_word failed, expecting %d more words", sz + 1)); return 1; } *array++ = (uchar) strtol(buf, NULL, 16); } return 0; } static void get_charset_conf_name(uint cs_number, char *buf) { strxmov(get_charsets_dir(buf), name_from_csnum(available_charsets, cs_number), ".conf", NullS); } static my_bool read_charset_file(uint cs_number, CHARSET_INFO *set, myf myflags) { struct simpleconfig_buf_st fb; char buf[FN_REFLEN]; my_bool result; DBUG_ENTER("read_charset_file"); DBUG_PRINT("enter",("cs_number: %d", cs_number)); if (cs_number <= 0) DBUG_RETURN(TRUE); get_charset_conf_name(cs_number, buf); DBUG_PRINT("info",("file name: %s", buf)); if ((fb.f = my_fopen(buf, O_RDONLY, myflags)) == NULL) DBUG_RETURN(TRUE); fb.buf[0] = '\0'; /* Init for get_word */ fb.p = fb.buf; result=FALSE; if (fill_array(set->ctype, CTYPE_TABLE_SIZE, &fb) || fill_array(set->to_lower, TO_LOWER_TABLE_SIZE, &fb) || fill_array(set->to_upper, TO_UPPER_TABLE_SIZE, &fb) || fill_array(set->sort_order, SORT_ORDER_TABLE_SIZE, &fb)) result=TRUE; my_fclose(fb.f, MYF(0)); DBUG_RETURN(result); } uint get_charset_number(const char *charset_name) { my_bool error; error = init_available_charsets(MYF(0)); /* If it isn't initialized */ if (error) return compiled_charset_number(charset_name); else return num_from_csname(available_charsets, charset_name); } const char *get_charset_name(uint charset_number) { my_bool error; error = init_available_charsets(MYF(0)); /* If it isn't initialized */ if (error) return compiled_charset_name(charset_number); else return name_from_csnum(available_charsets, charset_number); } static CHARSET_INFO *find_charset(CHARSET_INFO **table, uint cs_number, size_t tablesz) { uint i; for (i = 0; i < tablesz; ++i) if (table[i]->number == cs_number) return table[i]; return NULL; } static CHARSET_INFO *find_charset_by_name(CHARSET_INFO **table, const char *name, size_t tablesz) { uint i; for (i = 0; i < tablesz; ++i) if (!strcmp(table[i]->name,name)) return table[i]; return NULL; } static CHARSET_INFO *add_charset(uint cs_number, const char *cs_name) { CHARSET_INFO tmp_cs,*cs; uchar tmp_ctype[CTYPE_TABLE_SIZE]; uchar tmp_to_lower[TO_LOWER_TABLE_SIZE]; uchar tmp_to_upper[TO_UPPER_TABLE_SIZE]; uchar tmp_sort_order[SORT_ORDER_TABLE_SIZE]; /* Don't allocate memory if we are not sure we can find the char set */ cs= &tmp_cs; bzero((char*) cs, sizeof(*cs)); cs->ctype=tmp_ctype; cs->to_lower=tmp_to_lower; cs->to_upper=tmp_to_upper; cs->sort_order=tmp_sort_order; if (read_charset_file(cs_number, cs, MYF(MY_WME))) return NULL; cs = (CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO), MYF(MY_WME)); *cs=tmp_cs; cs->name = (char *) my_once_alloc((uint) strlen(cs_name)+1, MYF(MY_WME)); cs->ctype = (uchar*) my_once_alloc(CTYPE_TABLE_SIZE, MYF(MY_WME)); cs->to_lower = (uchar*) my_once_alloc(TO_LOWER_TABLE_SIZE, MYF(MY_WME)); cs->to_upper = (uchar*) my_once_alloc(TO_UPPER_TABLE_SIZE, MYF(MY_WME)); cs->sort_order=(uchar*) my_once_alloc(SORT_ORDER_TABLE_SIZE, MYF(MY_WME)); cs->number = cs_number; memcpy((char*) cs->name, (char*) cs_name, strlen(cs_name) + 1); memcpy((char*) cs->ctype, (char*) tmp_ctype, sizeof(tmp_ctype)); memcpy((char*) cs->to_lower, (char*) tmp_to_lower, sizeof(tmp_to_lower)); memcpy((char*) cs->to_upper, (char*) tmp_to_upper, sizeof(tmp_to_upper)); memcpy((char*) cs->sort_order, (char*) tmp_sort_order, sizeof(tmp_sort_order)); insert_dynamic(&cs_info_table, (gptr) &cs); return cs; } static CHARSET_INFO *get_internal_charset(uint cs_number) { CHARSET_INFO *cs; /* To make things thread safe we are not allowing other threads to interfere while we may changing the cs_info_table */ pthread_mutex_lock(&THR_LOCK_charset); if (!(cs = find_charset((CHARSET_INFO**) cs_info_table.buffer, cs_number, cs_info_table.elements))) if (!(cs = find_compiled_charset(cs_number))) cs=add_charset(cs_number, get_charset_name(cs_number)); pthread_mutex_unlock(&THR_LOCK_charset); return cs; } static CHARSET_INFO *get_internal_charset_by_name(const char *name) { CHARSET_INFO *cs; /* To make things thread safe we are not allowing other threads to interfere while we may changing the cs_info_table */ pthread_mutex_lock(&THR_LOCK_charset); if (!(cs = find_charset_by_name((CHARSET_INFO**) cs_info_table.buffer, name, cs_info_table.elements))) if (!(cs = find_compiled_charset_by_name(name))) cs=add_charset(get_charset_number(name), name); pthread_mutex_unlock(&THR_LOCK_charset); return cs; } CHARSET_INFO *get_charset(uint cs_number, myf flags) { CHARSET_INFO *cs; (void) init_available_charsets(MYF(0)); /* If it isn't initialized */ cs=get_internal_charset(cs_number); if (!cs && flags & MY_WME) { char index_file[FN_REFLEN], cs_string[23]; strmov(get_charsets_dir(index_file), "Index"); cs_string[0]='#'; int10_to_str(cs_number, cs_string+1, 10); my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file); } return cs; } my_bool set_default_charset(uint cs, myf flags) { CHARSET_INFO *new; DBUG_ENTER("set_default_charset"); DBUG_PRINT("enter",("character set: %d",(int) cs)); new = get_charset(cs, flags); if (!new) { DBUG_PRINT("error",("Couldn't set default character set")); DBUG_RETURN(TRUE); /* error */ } default_charset_info = new; DBUG_RETURN(FALSE); } CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags) { CHARSET_INFO *cs; (void) init_available_charsets(MYF(0)); /* If it isn't initialized */ cs=get_internal_charset_by_name(cs_name); if (!cs && (flags & MY_WME)) { char index_file[FN_REFLEN]; strmov(get_charsets_dir(index_file), "Index"); my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file); } return cs; } my_bool set_default_charset_by_name(const char *cs_name, myf flags) { CHARSET_INFO *new; DBUG_ENTER("set_default_charset_by_name"); DBUG_PRINT("enter",("character set: %s", cs_name)); new = get_charset_by_name(cs_name, flags); if (!new) { DBUG_PRINT("error",("Couldn't set default character set")); DBUG_RETURN(TRUE); /* error */ } default_charset_info = new; DBUG_RETURN(FALSE); } /* Only append name if it doesn't exist from before */ static my_bool charset_in_string(const char *name, DYNAMIC_STRING *s) { uint length= (uint) strlen(name); const char *pos; for (pos=s->str ; (pos=strstr(pos,name)) ; pos++) { if (! pos[length] || pos[length] == ' ') return TRUE; /* Already existed */ } return FALSE; } static void charset_append(DYNAMIC_STRING *s, const char *name) { if (!charset_in_string(name, s)) { dynstr_append(s, name); dynstr_append(s, " "); } } /* Returns a dynamically-allocated string listing the character sets requested. The caller is responsible for freeing the memory. */ char * list_charsets(myf want_flags) { DYNAMIC_STRING s; char *p; (void)init_available_charsets(MYF(0)); init_dynamic_string(&s, NullS, 256, 1024); if (want_flags & MY_COMPILED_SETS) { CHARSET_INFO *cs; for (cs = compiled_charsets; cs->number > 0; cs++) { dynstr_append(&s, cs->name); dynstr_append(&s, " "); } } if (want_flags & MY_CONFIG_SETS) { CS_ID **c; char buf[FN_REFLEN]; MY_STAT stat; if((c=available_charsets)) for (; *c; ++c) { if (charset_in_string((*c)->name, &s)) continue; get_charset_conf_name((*c)->number, buf); if (!my_stat(buf, &stat, MYF(0))) continue; /* conf file doesn't exist */ dynstr_append(&s, (*c)->name); dynstr_append(&s, " "); } } if (want_flags & MY_INDEX_SETS) { CS_ID **c; for (c = available_charsets; *c; ++c) charset_append(&s, (*c)->name); } if (want_flags & MY_LOADED_SETS) { uint i; for (i = 0; i < cs_info_table.elements; i++) charset_append(&s, dynamic_element(&cs_info_table, i, CHARSET_INFO *)->name); } s.str[s.length - 1] = '\0'; /* chop trailing space */ p = my_strdup(s.str, MYF(MY_WME)); dynstr_free(&s); return p; } /**************************************************************************** * Code for debugging. ****************************************************************************/ static void _print_array(uint8 *data, uint size) { uint i; for (i = 0; i < size; ++i) { if (i == 0 || i % 16 == size % 16) printf(" "); printf(" %02x", data[i]); if ((i+1) % 16 == size % 16) printf("\n"); } } /* _print_csinfo is called from test_charset.c */ void _print_csinfo(CHARSET_INFO *cs) { printf("%s #%d\n", cs->name, cs->number); printf("ctype:\n"); _print_array(cs->ctype, 257); printf("to_lower:\n"); _print_array(cs->to_lower, 256); printf("to_upper:\n"); _print_array(cs->to_upper, 256); printf("sort_order:\n"); _print_array(cs->sort_order, 256); printf("collate: %3s (%d, %p, %p, %p, %p, %p)\n", cs->strxfrm_multiply ? "yes" : "no", cs->strxfrm_multiply, cs->strcoll, cs->strxfrm, cs->strnncoll, cs->strnxfrm, cs->like_range); printf("multi-byte: %3s (%d, %p, %p, %p)\n", cs->mbmaxlen ? "yes" : "no", cs->mbmaxlen, cs->ismbchar, cs->ismbhead, cs->mbcharlen); }