charset.c 14.3 KB
Newer Older
unknown's avatar
unknown committed
1 2 3 4 5 6 7 8
/* Copyright (C) 2000 MySQL AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
unknown's avatar
unknown committed
9
   but WITHOUT ANY WARRANTY; without even the implied warranty of
unknown's avatar
unknown committed
10 11 12 13 14 15
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
unknown's avatar
unknown committed
16 17 18 19 20 21

#include "mysys_priv.h"
#include "mysys_err.h"
#include <m_ctype.h>
#include <m_string.h>
#include <my_dir.h>
unknown's avatar
unknown committed
22
#include <my_xml.h>
unknown's avatar
unknown committed
23

unknown's avatar
unknown committed
24

unknown's avatar
unknown committed
25

26 27 28 29 30 31 32 33 34
static void set_max_sort_char(CHARSET_INFO *cs)
{
  uchar max_char;
  uint  i;
  
  if (!cs->sort_order)
    return;
  
  max_char=cs->sort_order[(uchar) cs->max_sort_char];
unknown's avatar
unknown committed
35
  for (i= 0; i < 256; i++)
36 37 38 39 40 41 42 43
  {
    if ((uchar) cs->sort_order[i] > max_char)
    {
      max_char=(uchar) cs->sort_order[i];
      cs->max_sort_char= (char) i;
    }
  }
}
unknown's avatar
unknown committed
44

unknown's avatar
unknown committed
45

46
static void simple_cs_init_functions(CHARSET_INFO *cs)
unknown's avatar
unknown committed
47
{
48 49
  cs->like_range  = my_like_range_simple;
  cs->wildcmp     = my_wildcmp_8bit;
unknown's avatar
unknown committed
50
  cs->strnxfrm    = my_strnxfrm_simple;
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
  cs->strnncoll   = my_strnncoll_simple;
  cs->caseup_str  = my_caseup_str_8bit;
  cs->casedn_str  = my_casedn_str_8bit;
  cs->caseup      = my_caseup_8bit;
  cs->casedn      = my_casedn_8bit;
  cs->tosort      = my_tosort_8bit;
  cs->strcasecmp  = my_strcasecmp_8bit;
  cs->strncasecmp = my_strncasecmp_8bit;
  cs->mb_wc       = my_mb_wc_8bit;
  cs->wc_mb       = my_wc_mb_8bit;
  cs->hash_caseup = my_hash_caseup_simple;
  cs->hash_sort   = my_hash_sort_simple;
  cs->snprintf	  = my_snprintf_8bit;
  cs->strntol     = my_strntol_8bit;
  cs->strntoul    = my_strntoul_8bit;
  cs->strntoll    = my_strntoll_8bit;
  cs->strntoull   = my_strntoull_8bit;
  cs->strntod     = my_strntod_8bit;
  cs->mbmaxlen    = 1;
unknown's avatar
unknown committed
70 71
}

unknown's avatar
unknown committed
72 73 74 75 76

typedef struct
{
  int		nchars;
  MY_UNI_IDX	uidx;
77 78 79 80 81 82 83 84
} uni_idx;

#define PLANE_SIZE	0x100
#define PLANE_NUM	0x100
#define PLANE_NUMBER(x)	(((x)>>8) % PLANE_NUM)

static int pcmp(const void * f, const void * s)
{
unknown's avatar
unknown committed
85 86
  const uni_idx *F= (const uni_idx*) f;
  const uni_idx *S= (const uni_idx*) s;
87 88
  int res;

unknown's avatar
unknown committed
89
  if (!(res=((S->nchars)-(F->nchars))))
90 91 92 93
    res=((F->uidx.from)-(S->uidx.to));
  return res;
}

unknown's avatar
unknown committed
94 95 96

static my_bool create_fromuni(CHARSET_INFO *cs)
{
97 98 99 100 101 102 103
  uni_idx	idx[PLANE_NUM];
  int		i,n;
  
  /* Clear plane statistics */
  bzero(idx,sizeof(idx));
  
  /* Count number of characters in each plane */
unknown's avatar
unknown committed
104
  for (i=0; i< 0x100; i++)
105 106 107 108
  {
    uint16 wc=cs->tab_to_uni[i];
    int pl= PLANE_NUMBER(wc);
    
unknown's avatar
unknown committed
109
    if (wc || !i)
110
    {
unknown's avatar
unknown committed
111
      if (!idx[pl].nchars)
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
      {
        idx[pl].uidx.from=wc;
        idx[pl].uidx.to=wc;
      }else
      {
        idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
        idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
      }
      idx[pl].nchars++;
    }
  }
  
  /* Sort planes in descending order */
  qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
  
unknown's avatar
unknown committed
127
  for (i=0; i < PLANE_NUM; i++)
128 129 130 131
  {
    int ch,numchars;
    
    /* Skip empty plane */
unknown's avatar
unknown committed
132
    if (!idx[i].nchars)
133 134 135
      break;
    
    numchars=idx[i].uidx.to-idx[i].uidx.from+1;
unknown's avatar
unknown committed
136 137 138
    idx[i].uidx.tab=(unsigned char*)my_once_alloc(numchars *
						  sizeof(*idx[i].uidx.tab),
						  MYF(MY_WME));
139 140
    bzero(idx[i].uidx.tab,numchars*sizeof(*idx[i].uidx.tab));
    
unknown's avatar
unknown committed
141
    for (ch=1; ch < PLANE_SIZE; ch++)
142 143
    {
      uint16 wc=cs->tab_to_uni[ch];
unknown's avatar
unknown committed
144
      if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
145
      {
unknown's avatar
unknown committed
146 147
        int ofs= wc - idx[i].uidx.from;
        idx[i].uidx.tab[ofs]= ch;
148 149 150 151 152 153
      }
    }
  }
  
  /* Allocate and fill reverse table for each plane */
  n=i;
unknown's avatar
unknown committed
154 155 156 157
  cs->tab_from_uni= (MY_UNI_IDX*) my_once_alloc(sizeof(MY_UNI_IDX)*(n+1),
					       MYF(MY_WME));
  for (i=0; i< n; i++)
    cs->tab_from_uni[i]= idx[i].uidx;
158 159 160 161 162 163
  
  /* Set end-of-list marker */
  bzero(&cs->tab_from_uni[i],sizeof(MY_UNI_IDX));
  return FALSE;
}

unknown's avatar
unknown committed
164

165 166 167 168 169 170 171 172 173 174 175 176 177
static void simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
{
  to->number= from->number ? from->number : to->number;
  to->state|= from->state;

  if (from->csname)
    to->csname= my_once_strdup(from->csname,MYF(MY_WME));
  
  if (from->name)
    to->name= my_once_strdup(from->name,MYF(MY_WME));
  
  if (from->ctype)
    to->ctype= (uchar*) my_once_memdup((char*) from->ctype,
178
				       MY_CS_CTYPE_TABLE_SIZE, MYF(MY_WME));
179 180
  if (from->to_lower)
    to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower,
181
					  MY_CS_TO_LOWER_TABLE_SIZE, MYF(MY_WME));
182 183
  if (from->to_upper)
    to->to_upper= (uchar*) my_once_memdup((char*) from->to_upper,
184
					  MY_CS_TO_UPPER_TABLE_SIZE, MYF(MY_WME));
185 186 187
  if (from->sort_order)
  {
    to->sort_order= (uchar*) my_once_memdup((char*) from->sort_order,
188
					    MY_CS_SORT_ORDER_TABLE_SIZE,
189 190 191 192 193
					    MYF(MY_WME));
    set_max_sort_char(to);
  }
  if (from->tab_to_uni)
  {
194
    uint sz= MY_CS_TO_UNI_TABLE_SIZE*sizeof(uint16);
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
    to->tab_to_uni= (uint16*)  my_once_memdup((char*)from->tab_to_uni, sz,
					     MYF(MY_WME));
    create_fromuni(to);
  }
}


static my_bool simple_cs_is_full(CHARSET_INFO *cs)
{
  return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper &&
	   cs->to_lower) &&
	  (cs->number && cs->name && cs->sort_order));
}


static int add_collation(CHARSET_INFO *cs)
{
  if (cs->name && (cs->number || (cs->number=get_charset_number(cs->name))))
  {
    if (!all_charsets[cs->number])
    {
      if (!(all_charsets[cs->number]=
         (CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0))))
        return MY_XML_ERROR;
      bzero((void*)all_charsets[cs->number],sizeof(CHARSET_INFO));
    }
      
    if (!(all_charsets[cs->number]->state & MY_CS_COMPILED))
    {
      simple_cs_copy_data(all_charsets[cs->number],cs);
      if (simple_cs_is_full(all_charsets[cs->number]))
      {
        simple_cs_init_functions(all_charsets[cs->number]);
        all_charsets[cs->number]->state |= MY_CS_LOADED;
      }
    }
    cs->number= 0;
    cs->name= NULL;
    cs->state= 0;
    cs->sort_order= NULL;
    cs->state= 0;
  }
  return MY_XML_OK;
}


#define MAX_BUF 1024*16
#define MY_CHARSET_INDEX "Index.xml"

const char *charsets_dir= NULL;
static int charset_initialized=0;


static my_bool my_read_charset_file(const char *filename, myf myflags)
{
  char *buf;
  int  fd;
  uint len;
  
  if (!(buf= (char *)my_malloc(MAX_BUF,myflags)))
    return FALSE;
  
  if ((fd=my_open(filename,O_RDONLY,myflags)) < 0)
  {
    my_free(buf,myflags);
    return TRUE;
  }
  len=read(fd,buf,MAX_BUF);
  my_close(fd,myflags);
  
265
  if (my_parse_charset_xml(buf,len,add_collation))
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
  {
#ifdef NOT_YET
    printf("ERROR at line %d pos %d '%s'\n",
	   my_xml_error_lineno(&p)+1,
	   my_xml_error_pos(&p),
	   my_xml_error_string(&p));
#endif
  }
  
  my_free(buf, myflags);  
  return FALSE;
}


char *get_charsets_dir(char *buf)
{
  const char *sharedir= SHAREDIR;
  DBUG_ENTER("get_charsets_dir");

  if (charsets_dir != NULL)
    strmake(buf, charsets_dir, FN_REFLEN-1);
  else
  {
    if (test_if_hard_path(sharedir) ||
	is_prefix(sharedir, DEFAULT_CHARSET_HOME))
      strxmov(buf, sharedir, "/", CHARSET_DIR, NullS);
    else
      strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR,
	      NullS);
  }
  convert_dirname(buf,buf,NullS);
  DBUG_PRINT("info",("charsets dir: '%s'", buf));
  DBUG_RETURN(strend(buf));
}

static my_bool init_available_charsets(myf myflags)
{
  char fname[FN_REFLEN];
  my_bool error=FALSE;
  /*
    We have to use charset_initialized to not lock on THR_LOCK_charset
    inside get_internal_charset...
  */
  if (!charset_initialized)
  {
    CHARSET_INFO **cs;
    /*
      To make things thread safe we are not allowing other threads to interfere
      while we may changing the cs_info_table
    */
    pthread_mutex_lock(&THR_LOCK_charset);

    bzero(&all_charsets,sizeof(all_charsets));
    init_compiled_charsets(myflags);
    
    /* Copy compiled charsets */
    for (cs=all_charsets; cs < all_charsets+255 ; cs++)
    {
      if (*cs)
        set_max_sort_char(*cs);
    }
    
    strmov(get_charsets_dir(fname), MY_CHARSET_INDEX);
    error= my_read_charset_file(fname,myflags);
    charset_initialized=1;
    pthread_mutex_unlock(&THR_LOCK_charset);
  }
  return error;
}


void free_charsets(void)
{
  charset_initialized=0;
}


static void get_charset_conf_name(const char *cs_name, char *buf)
{
  strxmov(get_charsets_dir(buf), cs_name, ".conf", NullS);
}


349 350
uint get_charset_number(const char *charset_name)
{
351
  CHARSET_INFO **cs;
352 353 354
  if (init_available_charsets(MYF(0)))	/* If it isn't initialized */
    return 0;
  
unknown's avatar
unknown committed
355 356
  for (cs= all_charsets; cs < all_charsets+255; ++cs)
  {
357 358
    if ( cs[0] && cs[0]->name && !strcmp(cs[0]->name, charset_name))
      return cs[0]->number;
unknown's avatar
unknown committed
359
  }  
360 361 362 363 364 365 366 367 368 369
  return 0;   /* this mimics find_type() */
}


const char *get_charset_name(uint charset_number)
{
  CHARSET_INFO *cs;
  if (init_available_charsets(MYF(0)))	/* If it isn't initialized */
    return "?";

370
  cs=all_charsets[charset_number];
unknown's avatar
unknown committed
371
  if (cs && (cs->number == charset_number) && cs->name )
unknown's avatar
unknown committed
372 373
    return (char*) cs->name;
  
374 375 376 377
  return (char*) "?";   /* this mimics find_type() */
}


378
static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
unknown's avatar
unknown committed
379
{
unknown's avatar
unknown committed
380
  char  buf[FN_REFLEN];
unknown's avatar
unknown committed
381 382 383 384 385 386
  CHARSET_INFO *cs;
  /*
    To make things thread safe we are not allowing other threads to interfere
    while we may changing the cs_info_table
  */
  pthread_mutex_lock(&THR_LOCK_charset);
387

unknown's avatar
unknown committed
388
  cs= all_charsets[cs_number];
389

unknown's avatar
unknown committed
390 391
  if (cs && !(cs->state & (MY_CS_COMPILED | MY_CS_LOADED)))
  {
392 393
     strxmov(get_charsets_dir(buf), cs->csname, ".xml", NullS);
     my_read_charset_file(buf,flags);
unknown's avatar
unknown committed
394
     cs= (cs->state & MY_CS_LOADED) ? cs : NULL;
unknown's avatar
unknown committed
395
  }
unknown's avatar
unknown committed
396 397 398 399 400 401 402 403 404
  pthread_mutex_unlock(&THR_LOCK_charset);
  return cs;
}


CHARSET_INFO *get_charset(uint cs_number, myf flags)
{
  CHARSET_INFO *cs;
  (void) init_available_charsets(MYF(0));	/* If it isn't initialized */
unknown's avatar
unknown committed
405 406 407 408
  
  if (!cs_number)
    return NULL;
  
409
  cs=get_internal_charset(cs_number, flags);
unknown's avatar
unknown committed
410

unknown's avatar
unknown committed
411
  if (!cs && (flags & MY_WME))
unknown's avatar
unknown committed
412 413
  {
    char index_file[FN_REFLEN], cs_string[23];
unknown's avatar
unknown committed
414
    strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
unknown's avatar
unknown committed
415 416 417 418 419 420 421 422 423
    cs_string[0]='#';
    int10_to_str(cs_number, cs_string+1, 10);
    my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file);
  }
  return cs;
}

my_bool set_default_charset(uint cs, myf flags)
{
unknown's avatar
unknown committed
424
  CHARSET_INFO *new_charset;
unknown's avatar
unknown committed
425 426
  DBUG_ENTER("set_default_charset");
  DBUG_PRINT("enter",("character set: %d",(int) cs));
unknown's avatar
unknown committed
427 428

  new_charset= get_charset(cs, flags);
unknown's avatar
unknown committed
429
  if (!new_charset)
unknown's avatar
unknown committed
430 431 432 433
  {
    DBUG_PRINT("error",("Couldn't set default character set"));
    DBUG_RETURN(TRUE);   /* error */
  }
unknown's avatar
unknown committed
434 435 436
  default_charset_info= new_charset;
  system_charset_info= new_charset;

unknown's avatar
unknown committed
437 438 439 440 441
  DBUG_RETURN(FALSE);
}

CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)
{
442
  uint cs_number;
unknown's avatar
unknown committed
443 444 445
  CHARSET_INFO *cs;
  (void) init_available_charsets(MYF(0));	/* If it isn't initialized */

446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
  cs_number=get_charset_number(cs_name);
  cs= cs_number ? get_internal_charset(cs_number,flags) : NULL;

  if (!cs && (flags & MY_WME))
  {
    char index_file[FN_REFLEN];
    strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
    my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file);
  }

  return cs;
}


CHARSET_INFO *get_charset_by_csname(const char *cs_name, myf flags)
{
  CHARSET_INFO *cs=NULL;
  CHARSET_INFO **css;
  (void) init_available_charsets(MYF(0));	/* If it isn't initialized */
  
  for (css= all_charsets; css < all_charsets+255; ++css)
  {
unknown's avatar
unknown committed
468 469
    if ( css[0] && (css[0]->state & MY_CS_PRIMARY) && 
         css[0]->csname && !strcmp(css[0]->csname, cs_name))
470 471 472 473 474 475
    {
      cs= css[0]->number ? get_internal_charset(css[0]->number,flags) : NULL;
      break;
    }
  }  
  
unknown's avatar
unknown committed
476 477 478
  if (!cs && (flags & MY_WME))
  {
    char index_file[FN_REFLEN];
unknown's avatar
unknown committed
479
    strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
unknown's avatar
unknown committed
480 481 482 483 484 485
    my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file);
  }

  return cs;
}

unknown's avatar
unknown committed
486

unknown's avatar
unknown committed
487 488
my_bool set_default_charset_by_name(const char *cs_name, myf flags)
{
unknown's avatar
unknown committed
489
  CHARSET_INFO *new_charset;
unknown's avatar
unknown committed
490 491
  DBUG_ENTER("set_default_charset_by_name");
  DBUG_PRINT("enter",("character set: %s", cs_name));
unknown's avatar
unknown committed
492 493

  new_charset= get_charset_by_name(cs_name, flags);
unknown's avatar
unknown committed
494
  if (!new_charset)
unknown's avatar
unknown committed
495 496 497 498 499
  {
    DBUG_PRINT("error",("Couldn't set default character set"));
    DBUG_RETURN(TRUE);   /* error */
  }

unknown's avatar
unknown committed
500 501
  default_charset_info= new_charset;
  system_charset_info= new_charset;
unknown's avatar
unknown committed
502 503 504
  DBUG_RETURN(FALSE);
}

unknown's avatar
unknown committed
505

unknown's avatar
unknown committed
506 507 508 509
/* Only append name if it doesn't exist from before */

static my_bool charset_in_string(const char *name, DYNAMIC_STRING *s)
{
510
  uint length= (uint) strlen(name);
unknown's avatar
unknown committed
511 512 513 514 515 516 517 518 519
  const char *pos;
  for (pos=s->str ; (pos=strstr(pos,name)) ; pos++)
  {
    if (! pos[length] || pos[length] == ' ')
      return TRUE;				/* Already existed */
  }
  return FALSE;
}

unknown's avatar
unknown committed
520

unknown's avatar
unknown committed
521 522
static void charset_append(DYNAMIC_STRING *s, const char *name)
{
unknown's avatar
unknown committed
523 524
  if (!charset_in_string(name, s))
  {
unknown's avatar
unknown committed
525 526 527 528 529 530 531 532 533
    dynstr_append(s, name);
    dynstr_append(s, " ");
  }
}


/* Returns a dynamically-allocated string listing the character sets
   requested.  The caller is responsible for freeing the memory. */

unknown's avatar
unknown committed
534
char *list_charsets(myf want_flags)
unknown's avatar
unknown committed
535 536 537 538
{
  DYNAMIC_STRING s;
  char *p;

539
  (void)init_available_charsets(MYF(0));
unknown's avatar
unknown committed
540 541
  init_dynamic_string(&s, NullS, 256, 1024);

542
  if (want_flags & MY_CS_COMPILED)
unknown's avatar
unknown committed
543
  {
544
    CHARSET_INFO **cs;
unknown's avatar
unknown committed
545
    for (cs= all_charsets; cs < all_charsets+255; cs++)
unknown's avatar
unknown committed
546
    {
547 548 549 550 551
      if (cs[0])
      {
        dynstr_append(&s, cs[0]->name);
        dynstr_append(&s, " ");
      }
unknown's avatar
unknown committed
552 553 554
    }
  }

555
  if (want_flags & MY_CS_CONFIG)
unknown's avatar
unknown committed
556
  {
557
    CHARSET_INFO **cs;
unknown's avatar
unknown committed
558
    char buf[FN_REFLEN];
559
    MY_STAT status;
unknown's avatar
unknown committed
560

561 562
    for (cs=all_charsets; cs < all_charsets+255; cs++)
    {
563
      if (!cs[0] || !cs[0]->name || charset_in_string(cs[0]->name, &s))
564
	continue;
565
      get_charset_conf_name(cs[0]->name, buf);
566 567
      if (!my_stat(buf, &status, MYF(0)))
	continue;       /* conf file doesn't exist */
568
      dynstr_append(&s, cs[0]->name);
569 570
      dynstr_append(&s, " ");
    }
unknown's avatar
unknown committed
571 572
  }

unknown's avatar
unknown committed
573
  if (want_flags & (MY_CS_INDEX|MY_CS_LOADED))
unknown's avatar
unknown committed
574
  {
575
    CHARSET_INFO **cs;
unknown's avatar
unknown committed
576
    for (cs= all_charsets; cs < all_charsets + 255; cs++)
577 578
      if (cs[0] && cs[0]->name && (cs[0]->state & want_flags) )
        charset_append(&s, cs[0]->name);
unknown's avatar
unknown committed
579
  }
unknown's avatar
unknown committed
580 581
  
  if (s.length)
unknown's avatar
unknown committed
582
  {
unknown's avatar
unknown committed
583 584
    s.str[s.length - 1]= '\0';   /* chop trailing space */
    p= my_strdup(s.str, MYF(MY_WME));
unknown's avatar
unknown committed
585 586 587
  }
  else
  {
unknown's avatar
unknown committed
588
    p= my_strdup("", MYF(MY_WME));
unknown's avatar
unknown committed
589 590
  }
  dynstr_free(&s);
unknown's avatar
unknown committed
591
  
unknown's avatar
unknown committed
592 593
  return p;
}