ctype-bin.c 14.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/* Copyright (C) 2002 MySQL AB & tommy@valley.ne.jp.
   
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public
   License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.
   
   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.
   
   You should have received a copy of the GNU Library General Public
   License along with this library; if not, write to the Free
   Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
   MA 02111-1307, USA */

/* This file is for binary pseudo charset, created by bar@mysql.com */


#include <my_global.h>
#include "m_string.h"
#include "m_ctype.h"

25 26 27 28 29 30
static uchar ctype_bin[]=
{
  0,
  32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
  72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
unknown's avatar
unknown committed
31
  132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
32 33 34 35 36 37
  16,129,129,129,129,129,129,  1,  1,  1,  1,  1,  1,  1,  1,  1,
  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 16, 16, 16, 16, 16,
  16,130,130,130,130,130,130,  2,  2,  2,  2,  2,  2,  2,  2,  2,
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, 16, 16, 16, 16, 32,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
38 39 40 41 42 43
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
};


/* Dummy array for toupper / tolower / sortorder */

static uchar bin_char_array[] =
{
    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
   96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
  208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
unknown's avatar
unknown committed
67 68
};

69

70 71 72 73 74 75 76
static my_bool 
my_coll_init_8bit_bin(CHARSET_INFO *cs,
                      void *(*alloc)(uint) __attribute__((unused)))
{
  cs->max_sort_char=255; 
  return FALSE;
}
unknown's avatar
unknown committed
77

78 79 80 81 82 83 84 85 86 87 88
static int my_strnncoll_binary(CHARSET_INFO * cs __attribute__((unused)),
                               const uchar *s, uint slen,
                               const uchar *t, uint tlen,
                               my_bool t_is_prefix)
{
  uint len=min(slen,tlen);
  int cmp= memcmp(s,t,len);
  return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
}


89 90 91 92 93 94 95 96
uint my_lengthsp_binary(CHARSET_INFO *cs __attribute__((unused)),
		        const char *ptr __attribute__((unused)),
		        uint length)
{
  return length;
}


unknown's avatar
unknown committed
97 98 99 100
/*
  Compare two strings. Result is sign(first_argument - second_argument)

  SYNOPSIS
101
    my_strnncollsp_binary()
unknown's avatar
unknown committed
102 103 104 105 106 107 108
    cs			Chararacter set
    s			String to compare
    slen		Length of 's'
    t			String to compare
    tlen		Length of 't'

  NOTE
109 110
   This function is used for real binary strings, i.e. for
   BLOB, BINARY(N) and VARBINARY(N).
unknown's avatar
unknown committed
111
   It compares trailing spaces as spaces.
unknown's avatar
unknown committed
112 113 114 115 116 117 118

  RETURN
  < 0	s < t
  0	s == t
  > 0	s > t
*/

119 120
static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)),
                                 const uchar *s, uint slen,
121 122 123
                                 const uchar *t, uint tlen,
                                 my_bool diff_if_only_endspace_difference
                                 __attribute__((unused)))
124 125 126 127 128 129 130 131 132
{
  return my_strnncoll_binary(cs,s,slen,t,tlen,0);
}


static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
                                 const uchar *s, uint slen,
                                 const uchar *t, uint tlen,
                                 my_bool t_is_prefix)
133
{
134 135 136 137 138 139
  uint len=min(slen,tlen);
  int cmp= memcmp(s,t,len);
  return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
}


140 141 142 143 144 145 146 147 148 149
/*
  Compare two strings. Result is sign(first_argument - second_argument)

  SYNOPSIS
    my_strnncollsp_8bit_bin()
    cs			Chararacter set
    s			String to compare
    slen		Length of 's'
    t			String to compare
    tlen		Length of 't'
150 151 152
    diff_if_only_endspace_difference
		        Set to 1 if the strings should be regarded as different
                        if they only difference in end space
153 154 155

  NOTE
   This function is used for character strings with binary collations.
unknown's avatar
unknown committed
156 157
   The shorter string is extended with end space to be as long as the longer
   one.
158 159 160 161 162 163 164 165 166

  RETURN
  < 0	s < t
  0	s == t
  > 0	s > t
*/

static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
                                   const uchar *a, uint a_length, 
167 168
                                   const uchar *b, uint b_length,
                                   my_bool diff_if_only_endspace_difference)
169
{
170 171
  const uchar *end;
  uint length;
172 173 174 175 176
  int res;

#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
  diff_if_only_endspace_difference= 0;
#endif
177 178 179 180 181 182 183

  end= a + (length= min(a_length, b_length));
  while (a < end)
  {
    if (*a++ != *b++)
      return ((int) a[-1] - (int) b[-1]);
  }
184
  res= 0;
185 186
  if (a_length != b_length)
  {
187
    int swap= 1;
188 189 190 191
    /*
      Check the next not space character of the longer key. If it's < ' ',
      then it's smaller than the other key.
    */
192 193
    if (diff_if_only_endspace_difference)
      res= 1;                                   /* Assume 'a' is bigger */
194 195 196 197 198 199
    if (a_length < b_length)
    {
      /* put shorter key in s */
      a_length= b_length;
      a= b;
      swap= -1;					/* swap sign of result */
200
      res= -res;
201 202 203 204
    }
    for (end= a + a_length-length; a < end ; a++)
    {
      if (*a != ' ')
205
	return (*a < ' ') ? -swap : swap;
206 207
    }
  }
208
  return res;
209 210
}

211

unknown's avatar
unknown committed
212
/* This function is used for all conversion functions */
213

214
static uint my_case_str_bin(CHARSET_INFO *cs __attribute__((unused)),
unknown's avatar
unknown committed
215
			    char *str __attribute__((unused)))
216
{
217
  return 0;
218 219
}

220 221 222 223 224
static uint my_case_bin(CHARSET_INFO *cs __attribute__((unused)),
                        char *src __attribute__((unused)),
                        uint srclen,
                        char *dst __attribute__((unused)),
                        uint dstlen __attribute__((unused)))
225
{
226
  return srclen;
227 228 229 230
}


static int my_strcasecmp_bin(CHARSET_INFO * cs __attribute__((unused)),
unknown's avatar
unknown committed
231
			     const char *s, const char *t)
232 233 234 235
{
  return strcmp(s,t);
}

unknown's avatar
unknown committed
236

237
int my_mbcharlen_8bit(CHARSET_INFO *cs __attribute__((unused)),
unknown's avatar
unknown committed
238
		      uint c __attribute__((unused)))
239 240 241 242
{
  return 1;
}

unknown's avatar
unknown committed
243

244
static int my_mb_wc_bin(CHARSET_INFO *cs __attribute__((unused)),
unknown's avatar
unknown committed
245 246 247
			my_wc_t *wc,
			const unsigned char *str,
			const unsigned char *end __attribute__((unused)))
248
{
249
  if (str >= end)
250
    return MY_CS_TOOSMALL;
251
  
252 253 254 255
  *wc=str[0];
  return 1;
}

unknown's avatar
unknown committed
256

257
static int my_wc_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
unknown's avatar
unknown committed
258 259 260
			my_wc_t wc,
			unsigned char *s,
			unsigned char *e __attribute__((unused)))
261
{
262 263 264
  if (s >= e)
    return MY_CS_TOOSMALL;

unknown's avatar
unknown committed
265
  if (wc < 256)
266
  {
unknown's avatar
unknown committed
267
    s[0]= (char) wc;
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
    return 1;
  }
  return MY_CS_ILUNI;
}


void my_hash_sort_bin(CHARSET_INFO *cs __attribute__((unused)),
		      const uchar *key, uint len,ulong *nr1, ulong *nr2)
{
  const uchar *pos = key;
  
  key+= len;
  
  for (; pos < (uchar*) key ; pos++)
  {
    nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * 
	     ((uint)*pos)) + (nr1[0] << 8);
    nr2[0]+=3;
  }
}


unknown's avatar
unknown committed
290 291 292 293 294 295 296 297 298
/*
  The following defines is here to keep the following code identical to
  the one in ctype-simple.c
*/

#define likeconv(s,A) (A)
#define INC_PTR(cs,A,B) (A)++


299 300 301 302
int my_wildcmp_bin(CHARSET_INFO *cs,
                   const char *str,const char *str_end,
                   const char *wildstr,const char *wildend,
                   int escape, int w_one, int w_many)
303
{
unknown's avatar
unknown committed
304
  int result= -1;			/* Not found, using wildcards */
305 306 307 308 309 310 311
  
  while (wildstr != wildend)
  {
    while (*wildstr != w_many && *wildstr != w_one)
    {
      if (*wildstr == escape && wildstr+1 != wildend)
	wildstr++;
unknown's avatar
unknown committed
312 313
      if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
	return(1);			/* No match */
314
      if (wildstr == wildend)
unknown's avatar
unknown committed
315 316
	return(str != str_end);		/* Match if both are at end */
      result=1;				/* Found an anchor char */
317 318 319 320 321
    }
    if (*wildstr == w_one)
    {
      do
      {
unknown's avatar
unknown committed
322
	if (str == str_end)		/* Skip one char if possible */
323
	  return(result);
unknown's avatar
unknown committed
324 325
	INC_PTR(cs,str,str_end);
      } while (++wildstr < wildend && *wildstr == w_one);
326 327 328 329
      if (wildstr == wildend)
	break;
    }
    if (*wildstr == w_many)
unknown's avatar
unknown committed
330 331
    {					/* Found w_many */
      uchar cmp;
332 333 334 335 336 337 338 339 340 341
      wildstr++;
      /* Remove any '%' and '_' from the wild search string */
      for (; wildstr != wildend ; wildstr++)
      {
	if (*wildstr == w_many)
	  continue;
	if (*wildstr == w_one)
	{
	  if (str == str_end)
	    return(-1);
unknown's avatar
unknown committed
342
	  INC_PTR(cs,str,str_end);
343 344
	  continue;
	}
unknown's avatar
unknown committed
345
	break;				/* Not a wild character */
346 347
      }
      if (wildstr == wildend)
unknown's avatar
unknown committed
348
	return(0);			/* match if w_many is last */
349 350 351 352 353
      if (str == str_end)
	return(-1);
      
      if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
	cmp= *++wildstr;
unknown's avatar
unknown committed
354 355 356

      INC_PTR(cs,wildstr,wildend);	/* This is compared through cmp */
      cmp=likeconv(cs,cmp);
357 358
      do
      {
unknown's avatar
unknown committed
359
	while (str != str_end && (uchar) likeconv(cs,*str) != cmp)
360 361 362 363
	  str++;
	if (str++ == str_end)
	  return(-1);
	{
unknown's avatar
unknown committed
364 365
	  int tmp=my_wildcmp_bin(cs,str,str_end,wildstr,wildend,escape,w_one,
				 w_many);
366 367 368 369 370 371 372 373 374 375
	  if (tmp <= 0)
	    return(tmp);
	}
      } while (str != str_end && wildstr[0] != w_many);
      return(-1);
    }
  }
  return(str != str_end ? 1 : 0);
}

unknown's avatar
unknown committed
376

unknown's avatar
unknown committed
377
static int my_strnxfrm_bin(CHARSET_INFO *cs __attribute__((unused)),
378 379
                           uchar * dest, uint dstlen,
                           const uchar *src, uint srclen)
unknown's avatar
unknown committed
380
{
unknown's avatar
unknown committed
381
  if (dest != src)
382
    memcpy(dest, src, min(dstlen,srclen));
383
  if (dstlen > srclen)
384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
    bfill(dest + srclen, dstlen - srclen, 0);
  return dstlen;
}


static
int my_strnxfrm_8bit_bin(CHARSET_INFO *cs __attribute__((unused)),
                         uchar * dest, uint dstlen,
                         const uchar *src, uint srclen)
{
  if (dest != src)
    memcpy(dest, src, min(dstlen,srclen));
  if (dstlen > srclen)
    bfill(dest + srclen, dstlen - srclen, ' ');
  return dstlen;
unknown's avatar
unknown committed
399
}
400

unknown's avatar
unknown committed
401

402
static
403
uint my_instr_bin(CHARSET_INFO *cs __attribute__((unused)),
unknown's avatar
unknown committed
404 405 406
		  const char *b, uint b_length,
		  const char *s, uint s_length,
		  my_match_t *match, uint nmatch)
407 408
{
  register const uchar *str, *search, *end, *search_end;
409

410 411 412
  if (s_length <= b_length)
  {
    if (!s_length)
413 414 415 416 417 418 419 420 421
    {
      if (nmatch)
      {
        match->beg= 0;
        match->end= 0;
        match->mblen= 0;
      }
      return 1;		/* Empty string is always found */
    }
422

unknown's avatar
unknown committed
423 424 425 426
    str= (const uchar*) b;
    search= (const uchar*) s;
    end= (const uchar*) b+b_length-s_length+1;
    search_end= (const uchar*) s + s_length;
427

428
skip:
429 430 431 432 433
    while (str != end)
    {
      if ( (*str++) == (*search))
      {
	register const uchar *i,*j;
434 435

	i= str;
436
	j= search+1;
437

438 439
	while (j != search_end)
	  if ((*i++) != (*j++))
440
            goto skip;
441

442 443 444
        if (nmatch > 0)
	{
	  match[0].beg= 0;
445
	  match[0].end= (uint) (str- (const uchar*)b-1);
446
	  match[0].mblen= match[0].end;
447

448 449 450 451 452 453 454 455
	  if (nmatch > 1)
	  {
	    match[1].beg= match[0].end;
	    match[1].end= match[0].end+s_length;
	    match[1].mblen= match[1].end-match[1].beg;
	  }
	}
	return 2;
456 457 458
      }
    }
  }
459
  return 0;
460 461
}

462

463
MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
464
{
465
    my_coll_init_8bit_bin,
466 467
    my_strnncoll_8bit_bin,
    my_strnncollsp_8bit_bin,
468
    my_strnxfrm_8bit_bin,
469
    my_strnxfrmlen_simple,
470 471 472 473
    my_like_range_simple,
    my_wildcmp_bin,
    my_strcasecmp_bin,
    my_instr_bin,
474 475
    my_hash_sort_bin,
    my_propagate_simple
476 477 478 479
};


static MY_COLLATION_HANDLER my_collation_binary_handler =
480
{
481
    NULL,			/* init */
482
    my_strnncoll_binary,
483
    my_strnncollsp_binary,
484
    my_strnxfrm_bin,
485
    my_strnxfrmlen_simple,
486 487 488
    my_like_range_simple,
    my_wildcmp_bin,
    my_strcasecmp_bin,
489
    my_instr_bin,
490 491
    my_hash_sort_bin,
    my_propagate_simple
492 493
};

unknown's avatar
unknown committed
494

495 496
static MY_CHARSET_HANDLER my_charset_handler=
{
497
    NULL,			/* init */
498
    NULL,			/* ismbchar      */
499
    my_mbcharlen_8bit,		/* mbcharlen     */
500 501
    my_numchars_8bit,
    my_charpos_8bit,
502
    my_well_formed_len_8bit,
503
    my_lengthsp_binary,
504
    my_numcells_8bit,
505 506
    my_mb_wc_bin,
    my_wc_mb_bin,
507
    my_mb_ctype_8bit,
unknown's avatar
unknown committed
508 509 510 511
    my_case_str_bin,
    my_case_str_bin,
    my_case_bin,
    my_case_bin,
512 513 514 515 516 517 518 519 520
    my_snprintf_8bit,
    my_long10_to_str_8bit,
    my_longlong10_to_str_8bit,
    my_fill_8bit,
    my_strntol_8bit,
    my_strntoul_8bit,
    my_strntoll_8bit,
    my_strntoull_8bit,
    my_strntod_8bit,
521
    my_strtoll10_8bit,
522
    my_strntoull10rnd_8bit,
523 524 525
    my_scan_8bit
};

unknown's avatar
unknown committed
526

unknown's avatar
unknown committed
527
CHARSET_INFO my_charset_bin =
528
{
529
    63,0,0,			/* number        */
530
    MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PRIMARY,/* state */
531
    "binary",			/* cs name    */
532 533
    "binary",			/* name          */
    "",				/* comment       */
534
    NULL,			/* tailoring     */
unknown's avatar
unknown committed
535
    ctype_bin,			/* ctype         */
536 537
    bin_char_array,		/* to_lower      */
    bin_char_array,		/* to_upper      */
538
    NULL,			/* sort_order    */
539 540
    NULL,			/* contractions */
    NULL,			/* sort_order_big*/
541 542
    NULL,			/* tab_to_uni    */
    NULL,			/* tab_from_uni  */
543
    my_unicase_default,         /* caseinfo     */
unknown's avatar
unknown committed
544 545
    NULL,			/* state_map    */
    NULL,			/* ident_map    */
546
    1,				/* strxfrm_multiply */
547 548
    1,                          /* caseup_multiply  */
    1,                          /* casedn_multiply  */
unknown's avatar
unknown committed
549
    1,				/* mbminlen      */
550
    1,				/* mbmaxlen      */
551 552
    0,				/* min_sort_char */
    255,			/* max_sort_char */
553
    0,                          /* pad char      */
554
    0,                          /* escape_with_backslash_is_dangerous */
555
    &my_charset_handler,
556
    &my_collation_binary_handler
557
};