m_ctype.h 16.1 KB
Newer Older
unknown's avatar
unknown committed
1 2 3 4 5 6 7 8
/* Copyright (C) 2000 MySQL AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
unknown's avatar
unknown committed
9
   but WITHOUT ANY WARRANTY; without even the implied warranty of
unknown's avatar
unknown committed
10 11 12 13 14 15
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
unknown's avatar
unknown committed
16 17 18

/*
  A better inplementation of the UNIX ctype(3) library.
unknown's avatar
unknown committed
19
  Notes:   my_global.h should be included before ctype.h
unknown's avatar
unknown committed
20 21 22 23 24 25 26 27 28
*/

#ifndef _m_ctype_h
#define _m_ctype_h

#ifdef	__cplusplus
extern "C" {
#endif

29 30 31 32 33 34 35
#define MY_CS_NAME_SIZE			32
#define MY_CS_CTYPE_TABLE_SIZE		257
#define MY_CS_TO_LOWER_TABLE_SIZE	256
#define MY_CS_TO_UPPER_TABLE_SIZE	256
#define MY_CS_SORT_ORDER_TABLE_SIZE	256
#define MY_CS_TO_UNI_TABLE_SIZE		256

unknown's avatar
unknown committed
36 37
#define CHARSET_DIR	"charsets/"

unknown's avatar
unknown committed
38 39
#define my_wc_t ulong

unknown's avatar
unknown committed
40 41
typedef struct unicase_info_st
{
unknown's avatar
unknown committed
42 43 44 45 46 47 48 49 50 51
  uint16 toupper;
  uint16 tolower;
  uint16 sort;
} MY_UNICASE_INFO;

#define MY_CS_ILSEQ	0
#define MY_CS_ILUNI	0
#define MY_CS_TOOSMALL	-1
#define MY_CS_TOOFEW(n)	(-1-(n))

52 53 54
#define MY_SEQ_INTTAIL	1
#define MY_SEQ_SPACES	2

55 56 57 58 59
        /* My charsets_list flags */
#define MY_CS_COMPILED  1      /* compiled-in sets               */
#define MY_CS_CONFIG    2      /* sets that have a *.conf file   */
#define MY_CS_INDEX     4      /* sets listed in the Index file  */
#define MY_CS_LOADED    8      /* sets that are currently loaded */
60
#define MY_CS_BINSORT	16     /* if binary sort order           */
61
#define MY_CS_PRIMARY	32     /* if primary collation           */
unknown's avatar
unknown committed
62
#define MY_CS_STRNXFRM	64     /* if strnxfrm is used for sort   */
63
#define MY_CS_UNICODE	128    /* is a charset is full unicode   */
64
#define MY_CS_READY	256    /* if a charset is initialized    */
unknown's avatar
unknown committed
65
#define MY_CS_AVAILABLE	512    /* If either compiled-in or loaded*/
66

67 68
#define MY_CHARSET_UNDEFINED 0

69

unknown's avatar
unknown committed
70 71
typedef struct my_uni_idx_st
{
72 73 74 75
  uint16 from;
  uint16 to;
  uchar  *tab;
} MY_UNI_IDX;
unknown's avatar
unknown committed
76

77 78 79 80 81 82
typedef struct
{
  uint beg;
  uint end;
  uint mblen;
} my_match_t;
unknown's avatar
unknown committed
83

84 85 86 87
enum my_lex_states
{
  MY_LEX_START, MY_LEX_CHAR, MY_LEX_IDENT, 
  MY_LEX_IDENT_SEP, MY_LEX_IDENT_START,
88
  MY_LEX_REAL, MY_LEX_HEX_NUMBER,
89 90 91
  MY_LEX_CMP_OP, MY_LEX_LONG_CMP_OP, MY_LEX_STRING, MY_LEX_COMMENT, MY_LEX_END,
  MY_LEX_OPERATOR_OR_IDENT, MY_LEX_NUMBER_IDENT, MY_LEX_INT_OR_REAL,
  MY_LEX_REAL_OR_POINT, MY_LEX_BOOL, MY_LEX_EOL, MY_LEX_ESCAPE, 
92
  MY_LEX_LONG_COMMENT, MY_LEX_END_LONG_COMMENT, MY_LEX_SEMICOLON, 
93 94
  MY_LEX_SET_VAR, MY_LEX_USER_END, MY_LEX_HOSTNAME, MY_LEX_SKIP, 
  MY_LEX_USER_VARIABLE_DELIMITER, MY_LEX_SYSTEM_VAR,
unknown's avatar
unknown committed
95 96
  MY_LEX_IDENT_OR_KEYWORD,
  MY_LEX_IDENT_OR_HEX, MY_LEX_IDENT_OR_BIN, MY_LEX_IDENT_OR_NCHAR,
97 98 99
  MY_LEX_STRING_OR_DELIMITER
};

100
struct charset_info_st;
101

102
typedef struct my_collation_handler_st
unknown's avatar
unknown committed
103
{
104
  my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
105 106
  /* Collation routines */
  int     (*strnncoll)(struct charset_info_st *,
107
		       const uchar *, uint, const uchar *, uint, my_bool);
108 109
  int     (*strnncollsp)(struct charset_info_st *,
		       const uchar *, uint, const uchar *, uint);
110 111 112
  int     (*strnxfrm)(struct charset_info_st *,
		      uchar *, uint, const uchar *, uint);
  my_bool (*like_range)(struct charset_info_st *,
113
			const char *s, uint s_length,
unknown's avatar
unknown committed
114
			pchar w_prefix, pchar w_one, pchar w_many, 
115 116 117
			uint res_length,
			char *min_str, char *max_str,
			uint *min_len, uint *max_len);
118 119 120 121
  int     (*wildcmp)(struct charset_info_st *,
  		     const char *str,const char *str_end,
                     const char *wildstr,const char *wildend,
                     int escape,int w_one, int w_many);
122 123

  int  (*strcasecmp)(struct charset_info_st *, const char *, const char *);
124
  
125
  uint (*instr)(struct charset_info_st *,
unknown's avatar
unknown committed
126 127
                const char *b, uint b_length,
                const char *s, uint s_length,
128
                my_match_t *match, uint nmatch);
129
  
130 131 132 133 134
  /* Hash calculation */
  void (*hash_sort)(struct charset_info_st *cs, const uchar *key, uint len,
		    ulong *nr1, ulong *nr2); 
} MY_COLLATION_HANDLER;

135 136
extern MY_COLLATION_HANDLER my_collation_mb_bin_handler;
extern MY_COLLATION_HANDLER my_collation_8bit_bin_handler;
137
extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler;
138
extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
139 140 141 142


typedef struct my_charset_handler_st
{
143
  my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
144 145 146
  /* Multibyte routines */
  int     (*ismbchar)(struct charset_info_st *, const char *, const char *);
  int     (*mbcharlen)(struct charset_info_st *, uint);
147 148
  uint    (*numchars)(struct charset_info_st *, const char *b, const char *e);
  uint    (*charpos)(struct charset_info_st *, const char *b, const char *e, uint pos);
149
  uint    (*well_formed_len)(struct charset_info_st *,
unknown's avatar
unknown committed
150
  			   const char *b,const char *e, uint nchars);
unknown's avatar
unknown committed
151
  uint    (*lengthsp)(struct charset_info_st *, const char *ptr, uint length);
152
  
153 154 155 156 157
  /* Unicode convertion */
  int (*mb_wc)(struct charset_info_st *cs,my_wc_t *wc,
	       const unsigned char *s,const unsigned char *e);
  int (*wc_mb)(struct charset_info_st *cs,my_wc_t wc,
	       unsigned char *s,unsigned char *e);
158
  
159
  /* Functions for case and sort convertion */
160 161 162 163
  void    (*caseup_str)(struct charset_info_st *, char *);
  void    (*casedn_str)(struct charset_info_st *, char *);
  void    (*caseup)(struct charset_info_st *, char *, uint);
  void    (*casedn)(struct charset_info_st *, char *, uint);
164
  
165
  /* Charset dependant snprintf() */
unknown's avatar
unknown committed
166 167 168 169 170 171
  int  (*snprintf)(struct charset_info_st *, char *to, uint n, const char *fmt,
		   ...);
  int  (*long10_to_str)(struct charset_info_st *, char *to, uint n, int radix,
			long int val);
  int (*longlong10_to_str)(struct charset_info_st *, char *to, uint n,
			   int radix, longlong val);
172
  
173 174
  void (*fill)(struct charset_info_st *, char *to, uint len, int fill);
  
175
  /* String-to-number convertion routines */
unknown's avatar
unknown committed
176 177 178 179 180 181 182 183 184 185
  long        (*strntol)(struct charset_info_st *, const char *s, uint l,
			 int base, char **e, int *err);
  ulong      (*strntoul)(struct charset_info_st *, const char *s, uint l,
			 int base, char **e, int *err);
  longlong   (*strntoll)(struct charset_info_st *, const char *s, uint l,
			 int base, char **e, int *err);
  ulonglong (*strntoull)(struct charset_info_st *, const char *s, uint l,
			 int base, char **e, int *err);
  double      (*strntod)(struct charset_info_st *, char *s, uint l, char **e,
			 int *err);
186
  
unknown's avatar
unknown committed
187 188
  ulong        (*scan)(struct charset_info_st *, const char *b, const char *e,
		       int sq);
189 190 191
} MY_CHARSET_HANDLER;

extern MY_CHARSET_HANDLER my_charset_8bit_handler;
192
extern MY_CHARSET_HANDLER my_charset_ucs2_handler;
193 194 195 196 197 198 199 200 201 202 203


typedef struct charset_info_st
{
  uint      number;
  uint      primary_number;
  uint      binary_number;
  uint      state;
  const char *csname;
  const char *name;
  const char *comment;
204
  const char *tailoring;
205 206 207 208
  uchar    *ctype;
  uchar    *to_lower;
  uchar    *to_upper;
  uchar    *sort_order;
209
  uint16   *contractions;
210
  uint16   **sort_order_big;
211 212
  uint16      *tab_to_uni;
  MY_UNI_IDX  *tab_from_uni;
unknown's avatar
unknown committed
213 214
  uchar     *state_map;
  uchar     *ident_map;
215
  uint      strxfrm_multiply;
unknown's avatar
unknown committed
216
  uint      mbminlen;
217
  uint      mbmaxlen;
218 219
  uint16    min_sort_char;
  uint16    max_sort_char; /* For LIKE optimization */
220 221 222
  
  MY_CHARSET_HANDLER *cset;
  MY_COLLATION_HANDLER *coll;
223
  
unknown's avatar
unknown committed
224 225
} CHARSET_INFO;

226

unknown's avatar
unknown committed
227
extern CHARSET_INFO my_charset_bin;
228 229 230 231 232 233 234 235
extern CHARSET_INFO my_charset_big5_chinese_ci;
extern CHARSET_INFO my_charset_big5_bin;
extern CHARSET_INFO my_charset_euckr_korean_ci;
extern CHARSET_INFO my_charset_euckr_bin;
extern CHARSET_INFO my_charset_gb2312_chinese_ci;
extern CHARSET_INFO my_charset_gb2312_bin;
extern CHARSET_INFO my_charset_gbk_chinese_ci;
extern CHARSET_INFO my_charset_gbk_bin;
unknown's avatar
unknown committed
236
extern CHARSET_INFO my_charset_latin1;
237 238 239 240 241 242 243 244 245
extern CHARSET_INFO my_charset_latin1_german2_ci;
extern CHARSET_INFO my_charset_latin1_bin;
extern CHARSET_INFO my_charset_latin2_czech_ci;
extern CHARSET_INFO my_charset_sjis_japanese_ci;
extern CHARSET_INFO my_charset_sjis_bin;
extern CHARSET_INFO my_charset_tis620_thai_ci;
extern CHARSET_INFO my_charset_tis620_bin;
extern CHARSET_INFO my_charset_ucs2_general_ci;
extern CHARSET_INFO my_charset_ucs2_bin;
246
extern CHARSET_INFO my_charset_ucs2_general_uca;
247 248 249 250 251
extern CHARSET_INFO my_charset_ujis_japanese_ci;
extern CHARSET_INFO my_charset_ujis_bin;
extern CHARSET_INFO my_charset_utf8_general_ci;
extern CHARSET_INFO my_charset_utf8_bin;
extern CHARSET_INFO my_charset_cp1250_czech_ci;
252

253
/* declarations for simple charsets */
unknown's avatar
unknown committed
254 255 256
extern int  my_strnxfrm_simple(CHARSET_INFO *, uchar *, uint, const uchar *,
			       uint); 
extern int  my_strnncoll_simple(CHARSET_INFO *, const uchar *, uint,
257
				const uchar *, uint, my_bool);
258 259 260

extern int  my_strnncollsp_simple(CHARSET_INFO *, const uchar *, uint,
				const uchar *, uint);
261

262 263 264 265
extern void my_hash_sort_simple(CHARSET_INFO *cs,
				const uchar *key, uint len,
				ulong *nr1, ulong *nr2); 

unknown's avatar
unknown committed
266 267
extern uint my_lengthsp_8bit(CHARSET_INFO *cs, const char *ptr, uint length);

268
extern uint my_instr_simple(struct charset_info_st *,
unknown's avatar
unknown committed
269 270
                            const char *b, uint b_length,
                            const char *s, uint s_length,
271
                            my_match_t *match, uint nmatch);
272

273

274
/* Functions for 8bit */
275 276 277 278
extern void my_caseup_str_8bit(CHARSET_INFO *, char *);
extern void my_casedn_str_8bit(CHARSET_INFO *, char *);
extern void my_caseup_8bit(CHARSET_INFO *, char *, uint);
extern void my_casedn_8bit(CHARSET_INFO *, char *, uint);
279

280
extern int my_strcasecmp_8bit(CHARSET_INFO * cs, const char *, const char *);
281

unknown's avatar
unknown committed
282 283
int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc, const uchar *s,const uchar *e);
int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc, uchar *s, uchar *e);
284

285 286
ulong my_scan_8bit(CHARSET_INFO *cs, const char *b, const char *e, int sq);

unknown's avatar
unknown committed
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
int my_snprintf_8bit(struct charset_info_st *, char *to, uint n,
		     const char *fmt, ...);

long        my_strntol_8bit(CHARSET_INFO *, const char *s, uint l, int base,
			    char **e, int *err);
ulong      my_strntoul_8bit(CHARSET_INFO *, const char *s, uint l, int base,
			    char **e, int *err);
longlong   my_strntoll_8bit(CHARSET_INFO *, const char *s, uint l, int base,
			    char **e, int *err);
ulonglong my_strntoull_8bit(CHARSET_INFO *, const char *s, uint l, int base,
			    char **e, int *err);
double      my_strntod_8bit(CHARSET_INFO *, char *s, uint l,char **e,
			    int *err);
int  my_long10_to_str_8bit(CHARSET_INFO *, char *to, uint l, int radix,
			   long int val);
int my_longlong10_to_str_8bit(CHARSET_INFO *, char *to, uint l, int radix,
			      longlong val);
304

305 306
void my_fill_8bit(CHARSET_INFO *cs, char* to, uint l, int fill);

307
my_bool  my_like_range_simple(CHARSET_INFO *cs,
unknown's avatar
unknown committed
308
			      const char *ptr, uint ptr_length,
unknown's avatar
unknown committed
309
			      pbool escape, pbool w_one, pbool w_many,
unknown's avatar
unknown committed
310 311 312
			      uint res_length,
			      char *min_str, char *max_str,
			      uint *min_length, uint *max_length);
313

314 315 316 317 318 319 320
my_bool  my_like_range_ucs2(CHARSET_INFO *cs,
			    const char *ptr, uint ptr_length,
			    pbool escape, pbool w_one, pbool w_many,
			    uint res_length,
			    char *min_str, char *max_str,
			    uint *min_length, uint *max_length);

321

322
int my_wildcmp_8bit(CHARSET_INFO *,
unknown's avatar
unknown committed
323 324 325
		    const char *str,const char *str_end,
		    const char *wildstr,const char *wildend,
		    int escape, int w_one, int w_many);
326

327 328
uint my_numchars_8bit(CHARSET_INFO *, const char *b, const char *e);
uint my_charpos_8bit(CHARSET_INFO *, const char *b, const char *e, uint pos);
329
uint my_well_formed_len_8bit(CHARSET_INFO *, const char *b, const char *e, uint pos);
330
int my_mbcharlen_8bit(CHARSET_INFO *, uint c);
331

332

333
/* Functions for multibyte charsets */
334 335 336 337 338
extern void my_caseup_str_mb(CHARSET_INFO *, char *);
extern void my_casedn_str_mb(CHARSET_INFO *, char *);
extern void my_caseup_mb(CHARSET_INFO *, char *, uint);
extern void my_casedn_mb(CHARSET_INFO *, char *, uint);
extern int my_strcasecmp_mb(CHARSET_INFO * cs,const char *, const char *);
339

340
int my_wildcmp_mb(CHARSET_INFO *,
unknown's avatar
unknown committed
341 342 343
		  const char *str,const char *str_end,
		  const char *wildstr,const char *wildend,
		  int escape, int w_one, int w_many);
344 345
uint my_numchars_mb(CHARSET_INFO *, const char *b, const char *e);
uint my_charpos_mb(CHARSET_INFO *, const char *b, const char *e, uint pos);
346
uint my_well_formed_len_mb(CHARSET_INFO *, const char *b, const char *e, uint pos);
347
uint my_instr_mb(struct charset_info_st *,
unknown's avatar
unknown committed
348 349
                 const char *b, uint b_length,
                 const char *s, uint s_length,
350
                 my_match_t *match, uint nmatch);
351

352 353 354 355

extern my_bool my_parse_charset_xml(const char *bug, uint len,
				    int (*add)(CHARSET_INFO *cs));

unknown's avatar
unknown committed
356 357 358 359 360 361 362 363
#define	_MY_U	01	/* Upper case */
#define	_MY_L	02	/* Lower case */
#define	_MY_NMR	04	/* Numeral (digit) */
#define	_MY_SPC	010	/* Spacing character */
#define	_MY_PNT	020	/* Punctuation */
#define	_MY_CTR	040	/* Control character */
#define	_MY_B	0100	/* Blank */
#define	_MY_X	0200	/* heXadecimal digit */
unknown's avatar
unknown committed
364 365


366 367 368 369
#define	my_isascii(c)	(!((c) & ~0177))
#define	my_toascii(c)	((c) & 0177)
#define my_tocntrl(c)	((c) & 31)
#define my_toprint(c)	((c) | 64)
370 371
#define my_toupper(s,c)	(char) ((s)->to_upper[(uchar) (c)])
#define my_tolower(s,c)	(char) ((s)->to_lower[(uchar) (c)])
unknown's avatar
unknown committed
372 373 374 375 376 377 378 379 380 381 382
#define	my_isalpha(s, c)  (((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L))
#define	my_isupper(s, c)  (((s)->ctype+1)[(uchar) (c)] & _MY_U)
#define	my_islower(s, c)  (((s)->ctype+1)[(uchar) (c)] & _MY_L)
#define	my_isdigit(s, c)  (((s)->ctype+1)[(uchar) (c)] & _MY_NMR)
#define	my_isxdigit(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_X)
#define	my_isalnum(s, c)  (((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L | _MY_NMR))
#define	my_isspace(s, c)  (((s)->ctype+1)[(uchar) (c)] & _MY_SPC)
#define	my_ispunct(s, c)  (((s)->ctype+1)[(uchar) (c)] & _MY_PNT)
#define	my_isprint(s, c)  (((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B))
#define	my_isgraph(s, c)  (((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR))
#define	my_iscntrl(s, c)  (((s)->ctype+1)[(uchar) (c)] & _MY_CTR)
unknown's avatar
unknown committed
383

384 385 386 387
/* Some macros that should be cleaned up a little */
#define my_isvar(s,c)                 (my_isalnum(s,c) || (c) == '_')
#define my_isvar_start(s,c)           (my_isalpha(s,c) || (c) == '_')

unknown's avatar
unknown committed
388
#define my_binary_compare(s)	      ((s)->state  & MY_CS_BINSORT)
unknown's avatar
unknown committed
389
#define use_strnxfrm(s)               ((s)->state  & MY_CS_STRNXFRM)
390
#define my_strnxfrm(s, a, b, c, d)    ((s)->coll->strnxfrm((s), (a), (b), (c), (d)))
391
#define my_strnncoll(s, a, b, c, d) ((s)->coll->strnncoll((s), (a), (b), (c), (d), 0))
392
#define my_like_range(s, a, b, c, d, e, f, g, h, i, j) \
393
   ((s)->coll->like_range((s), (a), (b), (c), (d), (e), (f), (g), (h), (i), (j)))
394
#define my_wildcmp(cs,s,se,w,we,e,o,m) ((cs)->coll->wildcmp((cs),(s),(se),(w),(we),(e),(o),(m)))
395
#define my_strcasecmp(s, a, b)        ((s)->coll->strcasecmp((s), (a), (b)))
396 397
#define my_charpos(cs, b, e, num)     (cs)->cset->charpos((cs), (const char*) (b), (const char *)(e), (num))

398 399 400

#define use_mb(s)                     ((s)->cset->ismbchar != NULL)
#define my_ismbchar(s, a, b)          ((s)->cset->ismbchar((s), (a), (b)))
unknown's avatar
unknown committed
401
#ifdef USE_MB
402
#define my_mbcharlen(s, a)            ((s)->cset->mbcharlen((s),(a)))
unknown's avatar
unknown committed
403 404 405
#else
#define my_mbcharlen(s, a)            1
#endif
406 407 408 409 410 411 412 413 414 415

#define my_caseup(s, a, l)            ((s)->cset->caseup((s), (a), (l)))
#define my_casedn(s, a, l)            ((s)->cset->casedn((s), (a), (l)))
#define my_caseup_str(s, a)           ((s)->cset->caseup_str((s), (a)))
#define my_casedn_str(s, a)           ((s)->cset->casedn_str((s), (a)))
#define my_strntol(s, a, b, c, d, e)  ((s)->cset->strntol((s),(a),(b),(c),(d),(e)))
#define my_strntoul(s, a, b, c, d, e) ((s)->cset->strntoul((s),(a),(b),(c),(d),(e)))
#define my_strntoll(s, a, b, c, d, e) ((s)->cset->strntoll((s),(a),(b),(c),(d),(e)))
#define my_strntoull(s, a, b, c,d, e) ((s)->cset->strntoull((s),(a),(b),(c),(d),(e)))
#define my_strntod(s, a, b, c, d)     ((s)->cset->strntod((s),(a),(b),(c),(d)))
unknown's avatar
unknown committed
416

unknown's avatar
unknown committed
417 418 419 420 421 422 423 424 425 426 427 428 429

/* XXX: still need to take care of this one */
#ifdef MY_CHARSET_TIS620
#error The TIS620 charset is broken at the moment.  Tell tim to fix it.
#define USE_TIS620
#include "t_ctype.h"
#endif

#ifdef	__cplusplus
}
#endif

#endif /* _m_ctype_h */