ctype-bin.c 13.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/* Copyright (C) 2002 MySQL AB & tommy@valley.ne.jp.
   
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Library General Public
   License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.
   
   This library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.
   
   You should have received a copy of the GNU Library General Public
   License along with this library; if not, write to the Free
   Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
   MA 02111-1307, USA */

/* This file is for binary pseudo charset, created by bar@mysql.com */


#include <my_global.h>
#include "m_string.h"
#include "m_ctype.h"

25 26 27 28 29 30
static uchar ctype_bin[]=
{
  0,
  32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
  72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
unknown's avatar
unknown committed
31
  132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
32 33 34 35 36 37
  16,129,129,129,129,129,129,  1,  1,  1,  1,  1,  1,  1,  1,  1,
  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 16, 16, 16, 16, 16,
  16,130,130,130,130,130,130,  2,  2,  2,  2,  2,  2,  2,  2,  2,
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, 16, 16, 16, 16, 32,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
38 39 40 41 42 43
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
};


/* Dummy array for toupper / tolower / sortorder */

static uchar bin_char_array[] =
{
    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
   96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
  208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
unknown's avatar
unknown committed
67 68
};

69

unknown's avatar
unknown committed
70

71 72 73 74 75 76 77 78 79 80 81
static int my_strnncoll_binary(CHARSET_INFO * cs __attribute__((unused)),
                               const uchar *s, uint slen,
                               const uchar *t, uint tlen,
                               my_bool t_is_prefix)
{
  uint len=min(slen,tlen);
  int cmp= memcmp(s,t,len);
  return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
}


unknown's avatar
unknown committed
82 83 84 85
/*
  Compare two strings. Result is sign(first_argument - second_argument)

  SYNOPSIS
86
    my_strnncollsp_binary()
unknown's avatar
unknown committed
87 88 89 90 91 92 93
    cs			Chararacter set
    s			String to compare
    slen		Length of 's'
    t			String to compare
    tlen		Length of 't'

  NOTE
94 95
   This function is used for real binary strings, i.e. for
   BLOB, BINARY(N) and VARBINARY(N).
unknown's avatar
unknown committed
96
   It compares trailing spaces as spaces.
unknown's avatar
unknown committed
97 98 99 100 101 102 103

  RETURN
  < 0	s < t
  0	s == t
  > 0	s > t
*/

104 105
static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)),
                                 const uchar *s, uint slen,
106 107 108
                                 const uchar *t, uint tlen,
                                 my_bool diff_if_only_endspace_difference
                                 __attribute__((unused)))
109 110 111 112 113 114 115 116 117
{
  return my_strnncoll_binary(cs,s,slen,t,tlen,0);
}


static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
                                 const uchar *s, uint slen,
                                 const uchar *t, uint tlen,
                                 my_bool t_is_prefix)
118
{
119 120 121 122 123 124
  uint len=min(slen,tlen);
  int cmp= memcmp(s,t,len);
  return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
}


125 126 127 128 129 130 131 132 133 134
/*
  Compare two strings. Result is sign(first_argument - second_argument)

  SYNOPSIS
    my_strnncollsp_8bit_bin()
    cs			Chararacter set
    s			String to compare
    slen		Length of 's'
    t			String to compare
    tlen		Length of 't'
135 136 137
    diff_if_only_endspace_difference
		        Set to 1 if the strings should be regarded as different
                        if they only difference in end space
138 139 140

  NOTE
   This function is used for character strings with binary collations.
unknown's avatar
unknown committed
141 142
   The shorter string is extended with end space to be as long as the longer
   one.
143 144 145 146 147 148 149 150 151

  RETURN
  < 0	s < t
  0	s == t
  > 0	s > t
*/

static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
                                   const uchar *a, uint a_length, 
152 153
                                   const uchar *b, uint b_length,
                                   my_bool diff_if_only_endspace_difference)
154
{
155 156
  const uchar *end;
  uint length;
157 158 159 160 161
  int res;

#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
  diff_if_only_endspace_difference= 0;
#endif
162 163 164 165 166 167 168

  end= a + (length= min(a_length, b_length));
  while (a < end)
  {
    if (*a++ != *b++)
      return ((int) a[-1] - (int) b[-1]);
  }
169
  res= 0;
170 171 172 173 174 175 176
  if (a_length != b_length)
  {
    int swap= 0;
    /*
      Check the next not space character of the longer key. If it's < ' ',
      then it's smaller than the other key.
    */
177 178
    if (diff_if_only_endspace_difference)
      res= 1;                                   /* Assume 'a' is bigger */
179 180 181 182 183 184
    if (a_length < b_length)
    {
      /* put shorter key in s */
      a_length= b_length;
      a= b;
      swap= -1;					/* swap sign of result */
185
      res= -res;
186 187 188 189 190 191 192
    }
    for (end= a + a_length-length; a < end ; a++)
    {
      if (*a != ' ')
	return ((int) *a - (int) ' ') ^ swap;
    }
  }
193
  return res;
194 195
}

196

unknown's avatar
unknown committed
197
/* This function is used for all conversion functions */
198

unknown's avatar
unknown committed
199 200
static void my_case_str_bin(CHARSET_INFO *cs __attribute__((unused)),
			    char *str __attribute__((unused)))
201 202 203
{
}

unknown's avatar
unknown committed
204 205 206
static void my_case_bin(CHARSET_INFO *cs __attribute__((unused)),
			char *str __attribute__((unused)),
			uint length __attribute__((unused)))
207 208 209 210 211
{
}


static int my_strcasecmp_bin(CHARSET_INFO * cs __attribute__((unused)),
unknown's avatar
unknown committed
212
			     const char *s, const char *t)
213 214 215 216
{
  return strcmp(s,t);
}

unknown's avatar
unknown committed
217

218
int my_mbcharlen_8bit(CHARSET_INFO *cs __attribute__((unused)),
unknown's avatar
unknown committed
219
		      uint c __attribute__((unused)))
220 221 222 223
{
  return 1;
}

unknown's avatar
unknown committed
224

225
static int my_mb_wc_bin(CHARSET_INFO *cs __attribute__((unused)),
unknown's avatar
unknown committed
226 227 228
			my_wc_t *wc,
			const unsigned char *str,
			const unsigned char *end __attribute__((unused)))
229
{
230 231 232
  if (str >= end)
    return MY_CS_TOOFEW(0);
  
233 234 235 236
  *wc=str[0];
  return 1;
}

unknown's avatar
unknown committed
237

238
static int my_wc_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
unknown's avatar
unknown committed
239 240 241
			my_wc_t wc,
			unsigned char *s,
			unsigned char *e __attribute__((unused)))
242
{
243 244 245
  if (s >= e)
    return MY_CS_TOOSMALL;

unknown's avatar
unknown committed
246
  if (wc < 256)
247
  {
unknown's avatar
unknown committed
248
    s[0]= (char) wc;
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
    return 1;
  }
  return MY_CS_ILUNI;
}


void my_hash_sort_bin(CHARSET_INFO *cs __attribute__((unused)),
		      const uchar *key, uint len,ulong *nr1, ulong *nr2)
{
  const uchar *pos = key;
  
  key+= len;
  
  for (; pos < (uchar*) key ; pos++)
  {
    nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * 
	     ((uint)*pos)) + (nr1[0] << 8);
    nr2[0]+=3;
  }
}


unknown's avatar
unknown committed
271 272 273 274 275 276 277 278 279
/*
  The following defines is here to keep the following code identical to
  the one in ctype-simple.c
*/

#define likeconv(s,A) (A)
#define INC_PTR(cs,A,B) (A)++


280 281 282 283 284
static int my_wildcmp_bin(CHARSET_INFO *cs,
			   const char *str,const char *str_end,
			   const char *wildstr,const char *wildend,
			   int escape, int w_one, int w_many)
{
unknown's avatar
unknown committed
285
  int result= -1;			/* Not found, using wildcards */
286 287 288 289 290 291 292
  
  while (wildstr != wildend)
  {
    while (*wildstr != w_many && *wildstr != w_one)
    {
      if (*wildstr == escape && wildstr+1 != wildend)
	wildstr++;
unknown's avatar
unknown committed
293 294
      if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
	return(1);			/* No match */
295
      if (wildstr == wildend)
unknown's avatar
unknown committed
296 297
	return(str != str_end);		/* Match if both are at end */
      result=1;				/* Found an anchor char */
298 299 300 301 302
    }
    if (*wildstr == w_one)
    {
      do
      {
unknown's avatar
unknown committed
303
	if (str == str_end)		/* Skip one char if possible */
304
	  return(result);
unknown's avatar
unknown committed
305 306
	INC_PTR(cs,str,str_end);
      } while (++wildstr < wildend && *wildstr == w_one);
307 308 309 310
      if (wildstr == wildend)
	break;
    }
    if (*wildstr == w_many)
unknown's avatar
unknown committed
311 312
    {					/* Found w_many */
      uchar cmp;
313 314 315 316 317 318 319 320 321 322
      wildstr++;
      /* Remove any '%' and '_' from the wild search string */
      for (; wildstr != wildend ; wildstr++)
      {
	if (*wildstr == w_many)
	  continue;
	if (*wildstr == w_one)
	{
	  if (str == str_end)
	    return(-1);
unknown's avatar
unknown committed
323
	  INC_PTR(cs,str,str_end);
324 325
	  continue;
	}
unknown's avatar
unknown committed
326
	break;				/* Not a wild character */
327 328
      }
      if (wildstr == wildend)
unknown's avatar
unknown committed
329
	return(0);			/* match if w_many is last */
330 331 332 333 334
      if (str == str_end)
	return(-1);
      
      if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
	cmp= *++wildstr;
unknown's avatar
unknown committed
335 336 337

      INC_PTR(cs,wildstr,wildend);	/* This is compared through cmp */
      cmp=likeconv(cs,cmp);
338 339
      do
      {
unknown's avatar
unknown committed
340
	while (str != str_end && (uchar) likeconv(cs,*str) != cmp)
341 342 343 344
	  str++;
	if (str++ == str_end)
	  return(-1);
	{
unknown's avatar
unknown committed
345 346
	  int tmp=my_wildcmp_bin(cs,str,str_end,wildstr,wildend,escape,w_one,
				 w_many);
347 348 349 350 351 352 353 354 355 356
	  if (tmp <= 0)
	    return(tmp);
	}
      } while (str != str_end && wildstr[0] != w_many);
      return(-1);
    }
  }
  return(str != str_end ? 1 : 0);
}

unknown's avatar
unknown committed
357

unknown's avatar
unknown committed
358 359 360 361 362
static int my_strnxfrm_bin(CHARSET_INFO *cs __attribute__((unused)),
			    uchar * dest, uint len,
			    const uchar *src, 
			    uint srclen __attribute__((unused)))
{
unknown's avatar
unknown committed
363 364
  if (dest != src)
    memcpy(dest,src,len= min(len,srclen));
unknown's avatar
unknown committed
365 366
  return len;
}
367

unknown's avatar
unknown committed
368

369
static
370
uint my_instr_bin(CHARSET_INFO *cs __attribute__((unused)),
unknown's avatar
unknown committed
371 372 373
		  const char *b, uint b_length,
		  const char *s, uint s_length,
		  my_match_t *match, uint nmatch)
374 375
{
  register const uchar *str, *search, *end, *search_end;
376

377 378 379
  if (s_length <= b_length)
  {
    if (!s_length)
380 381 382 383 384 385 386 387 388
    {
      if (nmatch)
      {
        match->beg= 0;
        match->end= 0;
        match->mblen= 0;
      }
      return 1;		/* Empty string is always found */
    }
389

unknown's avatar
unknown committed
390 391 392 393
    str= (const uchar*) b;
    search= (const uchar*) s;
    end= (const uchar*) b+b_length-s_length+1;
    search_end= (const uchar*) s + s_length;
394

395
skip:
396 397 398 399 400
    while (str != end)
    {
      if ( (*str++) == (*search))
      {
	register const uchar *i,*j;
401 402

	i= str;
403
	j= search+1;
404

405 406
	while (j != search_end)
	  if ((*i++) != (*j++))
407
            goto skip;
408

409 410 411
        if (nmatch > 0)
	{
	  match[0].beg= 0;
unknown's avatar
unknown committed
412
	  match[0].end= str- (const uchar*)b-1;
413
	  match[0].mblen= match[0].end;
414

415 416 417 418 419 420 421 422
	  if (nmatch > 1)
	  {
	    match[1].beg= match[0].end;
	    match[1].end= match[0].end+s_length;
	    match[1].mblen= match[1].end-match[1].beg;
	  }
	}
	return 2;
423 424 425
      }
    }
  }
426
  return 0;
427 428
}

429

430
MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
431 432 433 434 435 436 437 438 439 440 441 442 443 444
{
    NULL,			/* init */
    my_strnncoll_8bit_bin,
    my_strnncollsp_8bit_bin,
    my_strnxfrm_bin,
    my_like_range_simple,
    my_wildcmp_bin,
    my_strcasecmp_bin,
    my_instr_bin,
    my_hash_sort_bin
};


static MY_COLLATION_HANDLER my_collation_binary_handler =
445
{
446
    NULL,			/* init */
447
    my_strnncoll_binary,
448
    my_strnncollsp_binary,
449 450 451 452
    my_strnxfrm_bin,
    my_like_range_simple,
    my_wildcmp_bin,
    my_strcasecmp_bin,
453
    my_instr_bin,
454 455 456
    my_hash_sort_bin
};

unknown's avatar
unknown committed
457

458 459
static MY_CHARSET_HANDLER my_charset_handler=
{
460
    NULL,			/* init */
461
    NULL,			/* ismbchar      */
462
    my_mbcharlen_8bit,		/* mbcharlen     */
463 464
    my_numchars_8bit,
    my_charpos_8bit,
465
    my_well_formed_len_8bit,
unknown's avatar
unknown committed
466
    my_lengthsp_8bit,
467
    my_numcells_8bit,
468 469
    my_mb_wc_bin,
    my_wc_mb_bin,
unknown's avatar
unknown committed
470 471 472 473
    my_case_str_bin,
    my_case_str_bin,
    my_case_bin,
    my_case_bin,
474 475 476 477 478 479 480 481 482
    my_snprintf_8bit,
    my_long10_to_str_8bit,
    my_longlong10_to_str_8bit,
    my_fill_8bit,
    my_strntol_8bit,
    my_strntoul_8bit,
    my_strntoll_8bit,
    my_strntoull_8bit,
    my_strntod_8bit,
483
    my_strtoll10_8bit,
484 485 486
    my_scan_8bit
};

unknown's avatar
unknown committed
487

unknown's avatar
unknown committed
488
CHARSET_INFO my_charset_bin =
489
{
490
    63,0,0,			/* number        */
unknown's avatar
unknown committed
491
    MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PRIMARY,/* state        */
492
    "binary",			/* cs name    */
493 494
    "binary",			/* name          */
    "",				/* comment       */
495
    NULL,			/* tailoring     */
unknown's avatar
unknown committed
496
    ctype_bin,			/* ctype         */
497 498
    bin_char_array,		/* to_lower      */
    bin_char_array,		/* to_upper      */
499
    NULL,			/* sort_order    */
500 501
    NULL,			/* contractions */
    NULL,			/* sort_order_big*/
502 503
    NULL,			/* tab_to_uni    */
    NULL,			/* tab_from_uni  */
unknown's avatar
unknown committed
504 505
    NULL,			/* state_map    */
    NULL,			/* ident_map    */
506
    1,				/* strxfrm_multiply */
unknown's avatar
unknown committed
507
    1,				/* mbminlen      */
508
    1,				/* mbmaxlen      */
509 510
    0,				/* min_sort_char */
    255,			/* max_sort_char */
511
    &my_charset_handler,
512
    &my_collation_binary_handler
513
};