ctype.c 8.32 KB
Newer Older
unknown's avatar
unknown committed
1 2 3 4
/* Copyright (C) 2000 MySQL AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
unknown's avatar
unknown committed
5
   the Free Software Foundation; version 2 of the License.
unknown's avatar
unknown committed
6 7

   This program is distributed in the hope that it will be useful,
unknown's avatar
unknown committed
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
unknown's avatar
unknown committed
9 10 11 12 13 14
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
unknown's avatar
unknown committed
15

unknown's avatar
unknown committed
16
#include <my_global.h>
unknown's avatar
unknown committed
17
#include <m_ctype.h>
18
#include <my_xml.h>
unknown's avatar
unknown committed
19
#ifndef SCO
20
#include <m_string.h>
unknown's avatar
unknown committed
21
#endif
unknown's avatar
unknown committed
22 23


unknown's avatar
unknown committed
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
/*

  This files implements routines which parse XML based
  character set and collation description files.
  
  Unicode collations are encoded according to
  
    Unicode Technical Standard #35
    Locale Data Markup Language (LDML)
    http://www.unicode.org/reports/tr35/
  
  and converted into ICU string according to
  
    Collation Customization
    http://oss.software.ibm.com/icu/userguide/Collate_Customization.html
  
*/
unknown's avatar
unknown committed
41

42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
static char *mstr(char *str,const char *src,uint l1,uint l2)
{
  l1= l1<l2 ? l1 : l2;
  memcpy(str,src,l1);
  str[l1]='\0';
  return str;
}

struct my_cs_file_section_st
{
  int        state;
  const char *str;
};

#define _CS_MISC	1
#define _CS_ID		2
#define _CS_CSNAME	3
#define _CS_FAMILY	4
#define _CS_ORDER	5
#define _CS_COLNAME	6
#define _CS_FLAG	7
#define _CS_CHARSET	8
#define _CS_COLLATION	9
#define _CS_UPPERMAP	10
#define _CS_LOWERMAP	11
#define _CS_UNIMAP	12
#define _CS_COLLMAP	13
#define _CS_CTYPEMAP	14
70 71
#define _CS_PRIMARY_ID	15
#define _CS_BINARY_ID	16
72
#define _CS_CSDESCRIPT	17
unknown's avatar
unknown committed
73 74 75 76 77
#define _CS_RESET	18
#define	_CS_DIFF1	19
#define	_CS_DIFF2	20
#define	_CS_DIFF3	21

78 79 80 81

static struct my_cs_file_section_st sec[] =
{
  {_CS_MISC,		"xml"},
82 83
  {_CS_MISC,		"xml/version"},
  {_CS_MISC,		"xml/encoding"},
84
  {_CS_MISC,		"charsets"},
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
  {_CS_MISC,		"charsets/max-id"},
  {_CS_CHARSET,		"charsets/charset"},
  {_CS_PRIMARY_ID,	"charsets/charset/primary-id"},
  {_CS_BINARY_ID,	"charsets/charset/binary-id"},
  {_CS_CSNAME,		"charsets/charset/name"},
  {_CS_FAMILY,		"charsets/charset/family"},
  {_CS_CSDESCRIPT,	"charsets/charset/description"},
  {_CS_MISC,		"charsets/charset/alias"},
  {_CS_MISC,		"charsets/charset/ctype"},
  {_CS_CTYPEMAP,	"charsets/charset/ctype/map"},
  {_CS_MISC,		"charsets/charset/upper"},
  {_CS_UPPERMAP,	"charsets/charset/upper/map"},
  {_CS_MISC,		"charsets/charset/lower"},
  {_CS_LOWERMAP,	"charsets/charset/lower/map"},
  {_CS_MISC,		"charsets/charset/unicode"},
  {_CS_UNIMAP,		"charsets/charset/unicode/map"},
  {_CS_COLLATION,	"charsets/charset/collation"},
  {_CS_COLNAME,		"charsets/charset/collation/name"},
  {_CS_ID,		"charsets/charset/collation/id"},
  {_CS_ORDER,		"charsets/charset/collation/order"},
  {_CS_FLAG,		"charsets/charset/collation/flag"},
  {_CS_COLLMAP,		"charsets/charset/collation/map"},
  {_CS_RESET,		"charsets/charset/collation/rules/reset"},
  {_CS_DIFF1,		"charsets/charset/collation/rules/p"},
  {_CS_DIFF2,		"charsets/charset/collation/rules/s"},
  {_CS_DIFF3,		"charsets/charset/collation/rules/t"},
111 112 113
  {0,	NULL}
};

114
static struct my_cs_file_section_st * cs_file_sec(const char *attr, size_t len)
115 116 117 118 119 120 121 122 123 124
{
  struct my_cs_file_section_st *s;
  for (s=sec; s->str; s++)
  {
    if (!strncmp(attr,s->str,len))
      return s;
  }
  return NULL;
}

125
#define MY_CS_CSDESCR_SIZE	64
126
#define MY_CS_TAILORING_SIZE	1024
127

128 129 130 131 132 133 134 135 136
typedef struct my_cs_file_info
{
  char   csname[MY_CS_NAME_SIZE];
  char   name[MY_CS_NAME_SIZE];
  uchar  ctype[MY_CS_CTYPE_TABLE_SIZE];
  uchar  to_lower[MY_CS_TO_LOWER_TABLE_SIZE];
  uchar  to_upper[MY_CS_TO_UPPER_TABLE_SIZE];
  uchar  sort_order[MY_CS_SORT_ORDER_TABLE_SIZE];
  uint16 tab_to_uni[MY_CS_TO_UNI_TABLE_SIZE];
137
  char   comment[MY_CS_CSDESCR_SIZE];
138 139
  char   tailoring[MY_CS_TAILORING_SIZE];
  size_t tailoring_length;
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
  CHARSET_INFO cs;
  int (*add_collation)(CHARSET_INFO *cs);
} MY_CHARSET_LOADER;



static int fill_uchar(uchar *a,uint size,const char *str, uint len)
{
  uint i= 0;
  const char *s, *b, *e=str+len;
  
  for (s=str ; s < e ; i++)
  { 
    for ( ; (s < e) && strchr(" \t\r\n",s[0]); s++) ;
    b=s;
    for ( ; (s < e) && !strchr(" \t\r\n",s[0]); s++) ;
    if (s == b || i > size)
      break;
158
    a[i]= (uchar) strtoul(b,NULL,16);
159 160 161 162
  }
  return 0;
}

163
static int fill_uint16(uint16 *a,uint size,const char *str, size_t len)
164 165
{
  uint i= 0;
166
  
167 168 169 170 171 172 173 174
  const char *s, *b, *e=str+len;
  for (s=str ; s < e ; i++)
  { 
    for ( ; (s < e) && strchr(" \t\r\n",s[0]); s++) ;
    b=s;
    for ( ; (s < e) && !strchr(" \t\r\n",s[0]); s++) ;
    if (s == b || i > size)
      break;
175
    a[i]= (uint16) strtol(b,NULL,16);
176 177 178 179 180
  }
  return 0;
}


181
static int cs_enter(MY_XML_PARSER *st,const char *attr, size_t len)
182 183 184 185 186 187
{
  struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
  struct my_cs_file_section_st *s= cs_file_sec(attr,len);
  
  if ( s && (s->state == _CS_CHARSET))
    bzero(&i->cs,sizeof(i->cs));
unknown's avatar
unknown committed
188 189
  
  if (s && (s->state == _CS_COLLATION))
190
    i->tailoring_length= 0;
unknown's avatar
unknown committed
191

192 193 194 195
  return MY_XML_OK;
}


196
static int cs_leave(MY_XML_PARSER *st,const char *attr, size_t len)
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
{
  struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
  struct my_cs_file_section_st *s= cs_file_sec(attr,len);
  int    state= s ? s->state : 0;
  int    rc;
  
  switch(state){
  case _CS_COLLATION:
    rc= i->add_collation ? i->add_collation(&i->cs) : MY_XML_OK;
    break;
  default:
    rc=MY_XML_OK;
  }
  return rc;
}


214
static int cs_value(MY_XML_PARSER *st,const char *attr, size_t len)
215 216 217
{
  struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
  struct my_cs_file_section_st *s;
218 219
  int    state= (int)((s=cs_file_sec(st->attr, strlen(st->attr))) ? s->state :
                      0);
220 221 222
  
  switch (state) {
  case _CS_ID:
223
    i->cs.number= strtol(attr,(char**)NULL,10);
224
    break;
225 226 227 228 229 230
  case _CS_BINARY_ID:
    i->cs.binary_number= strtol(attr,(char**)NULL,10);
    break;
  case _CS_PRIMARY_ID:
    i->cs.primary_number= strtol(attr,(char**)NULL,10);
    break;
231 232 233 234 235 236
  case _CS_COLNAME:
    i->cs.name=mstr(i->name,attr,len,MY_CS_NAME_SIZE-1);
    break;
  case _CS_CSNAME:
    i->cs.csname=mstr(i->csname,attr,len,MY_CS_NAME_SIZE-1);
    break;
237 238 239
  case _CS_CSDESCRIPT:
    i->cs.comment=mstr(i->comment,attr,len,MY_CS_CSDESCR_SIZE-1);
    break;
240 241 242
  case _CS_FLAG:
    if (!strncmp("primary",attr,len))
      i->cs.state|= MY_CS_PRIMARY;
243 244
    else if (!strncmp("binary",attr,len))
      i->cs.state|= MY_CS_BINSORT;
245 246
    else if (!strncmp("compiled",attr,len))
      i->cs.state|= MY_CS_COMPILED;
247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
    break;
  case _CS_UPPERMAP:
    fill_uchar(i->to_upper,MY_CS_TO_UPPER_TABLE_SIZE,attr,len);
    i->cs.to_upper=i->to_upper;
    break;
  case _CS_LOWERMAP:
    fill_uchar(i->to_lower,MY_CS_TO_LOWER_TABLE_SIZE,attr,len);
    i->cs.to_lower=i->to_lower;
    break;
  case _CS_UNIMAP:
    fill_uint16(i->tab_to_uni,MY_CS_TO_UNI_TABLE_SIZE,attr,len);
    i->cs.tab_to_uni=i->tab_to_uni;
    break;
  case _CS_COLLMAP:
    fill_uchar(i->sort_order,MY_CS_SORT_ORDER_TABLE_SIZE,attr,len);
    i->cs.sort_order=i->sort_order;
    break;
  case _CS_CTYPEMAP:
    fill_uchar(i->ctype,MY_CS_CTYPE_TABLE_SIZE,attr,len);
    i->cs.ctype=i->ctype;
    break;
unknown's avatar
unknown committed
268 269 270 271 272 273 274 275 276 277 278 279
  case _CS_RESET:
  case _CS_DIFF1:
  case _CS_DIFF2:
  case _CS_DIFF3:
    {
      /*
        Convert collation description from
        Locale Data Markup Language (LDML)
        into ICU Collation Customization expression.
      */
      char arg[16];
      const char *cmd[]= {"&","<","<<","<<<"};
280
      i->cs.tailoring= i->tailoring;
unknown's avatar
unknown committed
281
      mstr(arg,attr,len,sizeof(arg)-1);
282
      if (i->tailoring_length + 20 < sizeof(i->tailoring))
unknown's avatar
unknown committed
283
      {
284 285
        char *dst= i->tailoring_length + i->tailoring;
        i->tailoring_length+= sprintf(dst," %s %s",cmd[state-_CS_RESET],arg);
unknown's avatar
unknown committed
286 287
      }
    }
288 289 290 291 292
  }
  return MY_XML_OK;
}


293 294
my_bool my_parse_charset_xml(const char *buf, size_t len,
                             int (*add_collation)(CHARSET_INFO *cs))
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
{
  MY_XML_PARSER p;
  struct my_cs_file_info i;
  my_bool rc;
  
  my_xml_parser_create(&p);
  my_xml_set_enter_handler(&p,cs_enter);
  my_xml_set_value_handler(&p,cs_value);
  my_xml_set_leave_handler(&p,cs_leave);
  i.add_collation= add_collation;
  my_xml_set_user_data(&p,(void*)&i);
  rc= (my_xml_parse(&p,buf,len) == MY_XML_OK) ? FALSE : TRUE;
  my_xml_parser_free(&p);
  return rc;
}