ctype-simple.c 27.2 KB
Newer Older
1
/* Copyright (C) 2002 MySQL AB
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

#include <my_global.h>
18
#include "m_string.h"
unknown's avatar
unknown committed
19
#include "m_ctype.h"
unknown's avatar
unknown committed
20 21
#include <errno.h>

22
#include "stdarg.h"
23

24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
/*
  Converts a string into its sort key.
  
  SYNOPSIS
     my_strnxfrm_xxx()
     
  IMPLEMENTATION
     
     The my_strxfrm_xxx() function transforms a string pointed to by
     'src' with length 'srclen' according to the charset+collation 
     pair 'cs' and copies the result key into 'dest'.
     
     Comparing two strings using memcmp() after my_strnxfrm_xxx()
     is equal to comparing two original strings with my_strnncollsp_xxx().
     
     Not more than 'dstlen' bytes are written into 'dst'.
     To garantee that the whole string is transformed, 'dstlen' must be
     at least srclen*cs->strnxfrm_multiply bytes long. Otherwise,
     consequent memcmp() may return a non-accurate result.
     
     If the source string is too short to fill whole 'dstlen' bytes,
     then the 'dest' string is padded up to 'dstlen', ensuring that:
     
       "a"  == "a "
       "a\0" < "a"
       "a\0" < "a "
     
     my_strnxfrm_simple() is implemented for 8bit charsets and
     simple collations with one-to-one string->key transformation.
     
     See also implementations for various charsets/collations in  
     other ctype-xxx.c files.
     
  RETURN
  
    Target len 'dstlen'.
  
*/

unknown's avatar
unknown committed
63

64
int my_strnxfrm_simple(CHARSET_INFO * cs, 
65 66
                       uchar *dest, uint len,
                       const uchar *src, uint srclen)
67
{
68
  uchar *map= cs->sort_order;
69
  uint dstlen= len;
70
  set_if_smaller(len, srclen);
unknown's avatar
unknown committed
71 72 73 74 75 76 77 78 79 80 81 82
  if (dest != src)
  {
    const uchar *end;
    for ( end=src+len; src < end ;  )
      *dest++= map[*src++];
  }
  else
  {
    const uchar *end;
    for ( end=dest+len; dest < end ; dest++)
      *dest= (char) map[(uchar) *dest];
  }
83 84 85
  if (dstlen > len)
    bfill(dest, dstlen - len, ' ');
  return dstlen;
86 87
}

88
int my_strnncoll_simple(CHARSET_INFO * cs, const uchar *s, uint slen, 
89 90
                        const uchar *t, uint tlen,
                        my_bool t_is_prefix)
91 92
{
  int len = ( slen > tlen ) ? tlen : slen;
93
  uchar *map= cs->sort_order;
94 95
  if (t_is_prefix && slen > tlen)
    slen=tlen;
96 97
  while (len--)
  {
98 99
    if (map[*s++] != map[*t++])
      return ((int) map[s[-1]] - (int) map[t[-1]]);
100
  }
101
  return (int) (slen - tlen);
102 103
}

104

105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
/*
  Compare strings, discarding end space

  SYNOPSIS
    my_strnncollsp_simple()
    cs			character set handler
    a			First string to compare
    a_length		Length of 'a'
    b			Second string to compare
    b_length		Length of 'b'

  IMPLEMENTATION
    If one string is shorter as the other, then we space extend the other
    so that the strings have equal length.

    This will ensure that the following things hold:

    "a"  == "a "
    "a\0" < "a"
    "a\0" < "a "

  RETURN
    < 0	 a <  b
    = 0	 a == b
    > 0	 a > b
*/

int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, uint a_length, 
			  const uchar *b, uint b_length)
134
{
135 136 137 138 139
  const uchar *map= cs->sort_order, *end;
  uint length;

  end= a + (length= min(a_length, b_length));
  while (a < end)
140
  {
141 142
    if (map[*a++] != map[*b++])
      return ((int) map[a[-1]] - (int) map[b[-1]]);
143
  }
144 145
  if (a_length != b_length)
  {
146
    int swap= 1;
147 148 149 150 151 152 153 154 155
    /*
      Check the next not space character of the longer key. If it's < ' ',
      then it's smaller than the other key.
    */
    if (a_length < b_length)
    {
      /* put shorter key in s */
      a_length= b_length;
      a= b;
156
      swap= -1;                                 /* swap sign of result */
157 158 159 160
    }
    for (end= a + a_length-length; a < end ; a++)
    {
      if (*a != ' ')
161
	return (*a < ' ') ? -swap : swap;
162 163 164
    }
  }
  return 0;
165 166
}

167

168 169
void my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
{
unknown's avatar
unknown committed
170 171
  register uchar *map=cs->to_upper;
  while ((*str = (char) map[(uchar) *str]) != 0)
172 173 174 175 176
    str++;
}

void my_casedn_str_8bit(CHARSET_INFO * cs,char *str)
{
unknown's avatar
unknown committed
177 178
  register uchar *map=cs->to_lower;
  while ((*str = (char) map[(uchar)*str]) != 0)
179 180 181 182 183
    str++;
}

void my_caseup_8bit(CHARSET_INFO * cs, char *str, uint length)
{
unknown's avatar
unknown committed
184
  register uchar *map=cs->to_upper;
185
  for ( ; length>0 ; length--, str++)
unknown's avatar
unknown committed
186
    *str= (char) map[(uchar)*str];
187 188 189 190
}

void my_casedn_8bit(CHARSET_INFO * cs, char *str, uint length)
{
unknown's avatar
unknown committed
191
  register uchar *map=cs->to_lower;
192
  for ( ; length>0 ; length--, str++)
unknown's avatar
unknown committed
193
    *str= (char) map[(uchar) *str];
194 195 196 197
}

int my_strcasecmp_8bit(CHARSET_INFO * cs,const char *s, const char *t)
{
unknown's avatar
unknown committed
198 199
  register uchar *map=cs->to_upper;
  while (map[(uchar) *s] == map[(uchar) *t++])
200
    if (!*s++) return 0;
unknown's avatar
unknown committed
201
  return ((int) map[(uchar) s[0]] - (int) map[(uchar) t[-1]]);
202 203 204
}


205 206 207 208
int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc,
		  const unsigned char *str,
		  const unsigned char *end __attribute__((unused)))
{
209
  if (str >= end)
210
    return MY_CS_TOOSMALL;
211
  
212
  *wc=cs->tab_to_uni[*str];
213
  return (!wc[0] && str[0]) ? -1 : 1;
214 215 216
}

int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc,
217 218
		  unsigned char *str,
		  unsigned char *end __attribute__((unused)))
219 220 221
{
  MY_UNI_IDX *idx;

222 223 224
  if (str >= end)
    return MY_CS_TOOSMALL;
  
225 226 227 228 229 230
  for (idx=cs->tab_from_uni; idx->tab ; idx++)
  {
    if (idx->from <= wc && idx->to >= wc)
    {
      str[0]= idx->tab[wc - idx->from];
      return (!str[0] && wc) ? MY_CS_ILUNI : 1;
231 232 233 234
    }
  }
  return MY_CS_ILUNI;
}
235 236


237 238 239 240 241 242
/* 
   We can't use vsprintf here as it's not guaranteed to return
   the length on all operating systems.
   This function is also not called in a safe environment, so the
   end buffer must be checked.
*/
243 244

int my_snprintf_8bit(CHARSET_INFO *cs  __attribute__((unused)),
245 246
		     char* to, uint n  __attribute__((unused)),
		     const char* fmt, ...)
247 248
{
  va_list args;
249
  int result;
250
  va_start(args,fmt);
251 252 253
  result= my_vsnprintf(to, n, fmt, args);
  va_end(args);
  return result;
254 255 256
}


257
void my_hash_sort_simple(CHARSET_INFO *cs,
258 259
			 const uchar *key, uint len,
			 ulong *nr1, ulong *nr2)
260 261 262 263 264 265 266 267 268 269 270 271 272
{
  register uchar *sort_order=cs->sort_order;
  const uchar *pos = key;
  
  key+= len;
  
  for (; pos < (uchar*) key ; pos++)
  {
    nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * 
	     ((uint) sort_order[(uint) *pos])) + (nr1[0] << 8);
    nr2[0]+=3;
  }
}
273

unknown's avatar
unknown committed
274

unknown's avatar
unknown committed
275 276 277
long my_strntol_8bit(CHARSET_INFO *cs,
		     const char *nptr, uint l, int base,
		     char **endptr, int *err)
278
{
unknown's avatar
unknown committed
279
  int negative;
unknown's avatar
unknown committed
280
  register uint32 cutoff;
unknown's avatar
unknown committed
281
  register unsigned int cutlim;
unknown's avatar
unknown committed
282
  register uint32 i;
unknown's avatar
unknown committed
283 284 285 286
  register const char *s;
  register unsigned char c;
  const char *save, *e;
  int overflow;
287

288
  *err= 0;				/* Initialize error indicator */
unknown's avatar
merge  
unknown committed
289
#ifdef NOT_USED
unknown's avatar
unknown committed
290 291
  if (base < 0 || base == 1 || base > 36)
    base = 10;
292 293
#endif

unknown's avatar
unknown committed
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
  s = nptr;
  e = nptr+l;
  
  for ( ; s<e && my_isspace(cs, *s) ; s++);
  
  if (s == e)
  {
    goto noconv;
  }
  
  /* Check for a sign.	*/
  if (*s == '-')
  {
    negative = 1;
    ++s;
  }
  else if (*s == '+')
  {
    negative = 0;
    ++s;
  }
  else
    negative = 0;

unknown's avatar
merge  
unknown committed
318
#ifdef NOT_USED
unknown's avatar
unknown committed
319 320
  if (base == 16 && s[0] == '0' && (s[1]=='X' || s[1]=='x'))
    s += 2;
321
#endif
unknown's avatar
unknown committed
322

unknown's avatar
merge  
unknown committed
323
#ifdef NOT_USED
unknown's avatar
unknown committed
324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
  if (base == 0)
  {
    if (*s == '0')
    {
      if (s[1]=='X' || s[1]=='x')
      {
	s += 2;
	base = 16;
      }
      else
	base = 8;
    }
    else
      base = 10;
  }
339
#endif
unknown's avatar
unknown committed
340 341

  save = s;
unknown's avatar
unknown committed
342 343
  cutoff = ((uint32)~0L) / (uint32) base;
  cutlim = (uint) (((uint32)~0L) % (uint32) base);
unknown's avatar
unknown committed
344 345 346 347 348 349 350

  overflow = 0;
  i = 0;
  for (c = *s; s != e; c = *++s)
  {
    if (c>='0' && c<='9')
      c -= '0';
351
    else if (c>='A' && c<='Z')
unknown's avatar
unknown committed
352
      c = c - 'A' + 10;
353
    else if (c>='a' && c<='z')
unknown's avatar
unknown committed
354 355 356 357 358 359 360 361 362
      c = c - 'a' + 10;
    else
      break;
    if (c >= base)
      break;
    if (i > cutoff || (i == cutoff && c > cutlim))
      overflow = 1;
    else
    {
unknown's avatar
unknown committed
363
      i *= (uint32) base;
unknown's avatar
unknown committed
364 365 366 367 368 369 370 371 372 373 374 375
      i += c;
    }
  }
  
  if (s == save)
    goto noconv;
  
  if (endptr != NULL)
    *endptr = (char *) s;
  
  if (negative)
  {
unknown's avatar
unknown committed
376
    if (i  > (uint32) INT_MIN32)
unknown's avatar
unknown committed
377 378
      overflow = 1;
  }
unknown's avatar
unknown committed
379
  else if (i > INT_MAX32)
unknown's avatar
unknown committed
380 381 382 383
    overflow = 1;
  
  if (overflow)
  {
384
    err[0]= ERANGE;
unknown's avatar
unknown committed
385
    return negative ? INT_MIN32 : INT_MAX32;
unknown's avatar
unknown committed
386 387 388 389 390
  }
  
  return (negative ? -((long) i) : (long) i);

noconv:
391
  err[0]= EDOM;
unknown's avatar
unknown committed
392 393 394
  if (endptr != NULL)
    *endptr = (char *) nptr;
  return 0L;
395 396
}

unknown's avatar
unknown committed
397

unknown's avatar
unknown committed
398 399 400
ulong my_strntoul_8bit(CHARSET_INFO *cs,
		       const char *nptr, uint l, int base,
		       char **endptr, int *err)
401
{
unknown's avatar
unknown committed
402
  int negative;
unknown's avatar
unknown committed
403
  register uint32 cutoff;
unknown's avatar
unknown committed
404
  register unsigned int cutlim;
unknown's avatar
unknown committed
405
  register uint32 i;
unknown's avatar
unknown committed
406 407 408 409 410
  register const char *s;
  register unsigned char c;
  const char *save, *e;
  int overflow;

411
  *err= 0;				/* Initialize error indicator */
unknown's avatar
merge  
unknown committed
412
#ifdef NOT_USED
unknown's avatar
unknown committed
413 414
  if (base < 0 || base == 1 || base > 36)
    base = 10;
415 416
#endif

unknown's avatar
unknown committed
417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
  s = nptr;
  e = nptr+l;
  
  for( ; s<e && my_isspace(cs, *s); s++);
  
  if (s==e)
  {
    goto noconv;
  }

  if (*s == '-')
  {
    negative = 1;
    ++s;
  }
  else if (*s == '+')
  {
    negative = 0;
    ++s;
  }
  else
    negative = 0;

unknown's avatar
merge  
unknown committed
440
#ifdef NOT_USED
unknown's avatar
unknown committed
441 442
  if (base == 16 && s[0] == '0' && (s[1]=='X' || s[1]=='x'))
    s += 2;
443
#endif
unknown's avatar
unknown committed
444

unknown's avatar
merge  
unknown committed
445
#ifdef NOT_USED
unknown's avatar
unknown committed
446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
  if (base == 0)
  {
    if (*s == '0')
    {
      if (s[1]=='X' || s[1]=='x')
      {
	s += 2;
	base = 16;
      }
      else
	base = 8;
    }
    else
      base = 10;
  }
461
#endif
unknown's avatar
unknown committed
462 463

  save = s;
unknown's avatar
unknown committed
464 465
  cutoff = ((uint32)~0L) / (uint32) base;
  cutlim = (uint) (((uint32)~0L) % (uint32) base);
unknown's avatar
unknown committed
466 467 468 469 470 471 472
  overflow = 0;
  i = 0;
  
  for (c = *s; s != e; c = *++s)
  {
    if (c>='0' && c<='9')
      c -= '0';
473
    else if (c>='A' && c<='Z')
unknown's avatar
unknown committed
474
      c = c - 'A' + 10;
475
    else if (c>='a' && c<='z')
unknown's avatar
unknown committed
476 477 478 479 480 481 482 483 484
      c = c - 'a' + 10;
    else
      break;
    if (c >= base)
      break;
    if (i > cutoff || (i == cutoff && c > cutlim))
      overflow = 1;
    else
    {
unknown's avatar
unknown committed
485
      i *= (uint32) base;
unknown's avatar
unknown committed
486 487 488 489 490 491 492 493 494 495 496 497
      i += c;
    }
  }

  if (s == save)
    goto noconv;

  if (endptr != NULL)
    *endptr = (char *) s;

  if (overflow)
  {
498
    err[0]= ERANGE;
unknown's avatar
unknown committed
499
    return (~(uint32) 0);
unknown's avatar
unknown committed
500 501 502 503 504
  }
  
  return (negative ? -((long) i) : (long) i);
  
noconv:
505
  err[0]= EDOM;
unknown's avatar
unknown committed
506 507 508
  if (endptr != NULL)
    *endptr = (char *) nptr;
  return 0L;
509 510
}

unknown's avatar
unknown committed
511

unknown's avatar
unknown committed
512 513 514
longlong my_strntoll_8bit(CHARSET_INFO *cs __attribute__((unused)),
			  const char *nptr, uint l, int base,
			  char **endptr,int *err)
515
{
unknown's avatar
unknown committed
516 517 518 519 520 521 522 523
  int negative;
  register ulonglong cutoff;
  register unsigned int cutlim;
  register ulonglong i;
  register const char *s, *e;
  const char *save;
  int overflow;

524
  *err= 0;				/* Initialize error indicator */
unknown's avatar
merge  
unknown committed
525
#ifdef NOT_USED
unknown's avatar
unknown committed
526 527
  if (base < 0 || base == 1 || base > 36)
    base = 10;
528
#endif
unknown's avatar
unknown committed
529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552

  s = nptr;
  e = nptr+l;

  for(; s<e && my_isspace(cs,*s); s++);

  if (s == e)
  {
    goto noconv;
  }

  if (*s == '-')
  {
    negative = 1;
    ++s;
  }
  else if (*s == '+')
  {
    negative = 0;
    ++s;
  }
  else
    negative = 0;

unknown's avatar
merge  
unknown committed
553
#ifdef NOT_USED
unknown's avatar
unknown committed
554 555
  if (base == 16 && s[0] == '0' && (s[1]=='X'|| s[1]=='x'))
    s += 2;
556
#endif
unknown's avatar
unknown committed
557

unknown's avatar
merge  
unknown committed
558
#ifdef NOT_USED
unknown's avatar
unknown committed
559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
  if (base == 0)
  {
    if (*s == '0')
    {
      if (s[1]=='X' || s[1]=='x')
      {
	s += 2;
	base = 16;
      }
      else
	base = 8;
    }
    else
      base = 10;
  }
574
#endif
unknown's avatar
unknown committed
575 576 577 578 579 580 581 582

  save = s;

  cutoff = (~(ulonglong) 0) / (unsigned long int) base;
  cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);

  overflow = 0;
  i = 0;
583
  for ( ; s != e; s++)
unknown's avatar
unknown committed
584
  {
585
    register unsigned char c= *s;
unknown's avatar
unknown committed
586 587
    if (c>='0' && c<='9')
      c -= '0';
588
    else if (c>='A' && c<='Z')
unknown's avatar
unknown committed
589
      c = c - 'A' + 10;
590
    else if (c>='a' && c<='z')
unknown's avatar
unknown committed
591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620
      c = c - 'a' + 10;
    else
      break;
    if (c >= base)
      break;
    if (i > cutoff || (i == cutoff && c > cutlim))
      overflow = 1;
    else
    {
      i *= (ulonglong) base;
      i += c;
    }
  }

  if (s == save)
    goto noconv;

  if (endptr != NULL)
    *endptr = (char *) s;

  if (negative)
  {
    if (i  > (ulonglong) LONGLONG_MIN)
      overflow = 1;
  }
  else if (i > (ulonglong) LONGLONG_MAX)
    overflow = 1;

  if (overflow)
  {
621
    err[0]= ERANGE;
unknown's avatar
unknown committed
622 623 624 625 626 627
    return negative ? LONGLONG_MIN : LONGLONG_MAX;
  }

  return (negative ? -((longlong) i) : (longlong) i);

noconv:
628
  err[0]= EDOM;
unknown's avatar
unknown committed
629 630 631
  if (endptr != NULL)
    *endptr = (char *) nptr;
  return 0L;
632 633
}

unknown's avatar
unknown committed
634 635

ulonglong my_strntoull_8bit(CHARSET_INFO *cs,
636 637
			   const char *nptr, uint l, int base,
			   char **endptr, int *err)
638
{
unknown's avatar
unknown committed
639 640 641 642 643 644 645 646
  int negative;
  register ulonglong cutoff;
  register unsigned int cutlim;
  register ulonglong i;
  register const char *s, *e;
  const char *save;
  int overflow;

647
  *err= 0;				/* Initialize error indicator */
unknown's avatar
merge  
unknown committed
648
#ifdef NOT_USED
unknown's avatar
unknown committed
649 650
  if (base < 0 || base == 1 || base > 36)
    base = 10;
651
#endif
unknown's avatar
unknown committed
652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675

  s = nptr;
  e = nptr+l;

  for(; s<e && my_isspace(cs,*s); s++);

  if (s == e)
  {
    goto noconv;
  }

  if (*s == '-')
  {
    negative = 1;
    ++s;
  }
  else if (*s == '+')
  {
    negative = 0;
    ++s;
  }
  else
    negative = 0;

unknown's avatar
merge  
unknown committed
676
#ifdef NOT_USED
unknown's avatar
unknown committed
677 678
  if (base == 16 && s[0] == '0' && (s[1]=='X' || s[1]=='x'))
    s += 2;
679
#endif
unknown's avatar
unknown committed
680

unknown's avatar
merge  
unknown committed
681
#ifdef NOT_USED
unknown's avatar
unknown committed
682 683 684 685 686 687 688 689 690 691 692 693 694 695 696
  if (base == 0)
  {
    if (*s == '0')
    {
      if (s[1]=='X' || s[1]=='x')
      {
	s += 2;
	base = 16;
      }
      else
	base = 8;
    }
    else
      base = 10;
  }
697
#endif
unknown's avatar
unknown committed
698 699 700 701 702 703 704 705

  save = s;

  cutoff = (~(ulonglong) 0) / (unsigned long int) base;
  cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);

  overflow = 0;
  i = 0;
706
  for ( ; s != e; s++)
unknown's avatar
unknown committed
707
  {
708 709
    register unsigned char c= *s;

unknown's avatar
unknown committed
710 711
    if (c>='0' && c<='9')
      c -= '0';
712
    else if (c>='A' && c<='Z')
unknown's avatar
unknown committed
713
      c = c - 'A' + 10;
714
    else if (c>='a' && c<='z')
unknown's avatar
unknown committed
715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736
      c = c - 'a' + 10;
    else
      break;
    if (c >= base)
      break;
    if (i > cutoff || (i == cutoff && c > cutlim))
      overflow = 1;
    else
    {
      i *= (ulonglong) base;
      i += c;
    }
  }

  if (s == save)
    goto noconv;

  if (endptr != NULL)
    *endptr = (char *) s;

  if (overflow)
  {
737
    err[0]= ERANGE;
unknown's avatar
unknown committed
738 739 740 741 742 743
    return (~(ulonglong) 0);
  }

  return (negative ? -((longlong) i) : (longlong) i);

noconv:
744
  err[0]= EDOM;
unknown's avatar
unknown committed
745 746 747
  if (endptr != NULL)
    *endptr = (char *) nptr;
  return 0L;
748 749
}

750 751 752 753 754 755 756 757
/*
  Read double from string

  SYNOPSIS:
    my_strntod_8bit()
    cs		Character set information
    str		String to convert to double
    length	Optional length for string.
758 759
    end		result pointer to end of converted string
    err		Error number if failed conversion
760 761 762 763 764 765 766 767 768
    
  NOTES:
    If length is not INT_MAX32 or str[length] != 0 then the given str must
    be writeable
    If length == INT_MAX32 the str must be \0 terminated.

    It's implemented this way to save a buffer allocation and a memory copy.

  RETURN
769
    Value of number in string
770 771 772 773
*/


double my_strntod_8bit(CHARSET_INFO *cs __attribute__((unused)),
unknown's avatar
unknown committed
774
		       char *str, uint length,
775
		       char **end, int *err)
776
{
777
  if (length == INT_MAX32)
unknown's avatar
unknown committed
778 779 780
    length= 65535;                          /* Should be big enough */
  *end= str + length;
  return my_strtod(str, end, err);
781
}
782 783


784 785
/*
  This is a fast version optimized for the case of radix 10 / -10
786 787

  Assume len >= 1
788 789
*/

unknown's avatar
unknown committed
790
int my_long10_to_str_8bit(CHARSET_INFO *cs __attribute__((unused)),
791 792
		     char *dst, uint len, int radix, long int val)
{
unknown's avatar
unknown committed
793 794 795
  char buffer[66];
  register char *p, *e;
  long int new_val;
796 797
  uint sign=0;

unknown's avatar
unknown committed
798
  e = p = &buffer[sizeof(buffer)-1];
799
  *p= 0;
unknown's avatar
unknown committed
800 801 802 803 804
  
  if (radix < 0)
  {
    if (val < 0)
    {
unknown's avatar
unknown committed
805
      val= -(unsigned long int)val;
806 807 808
      *dst++= '-';
      len--;
      sign= 1;
unknown's avatar
unknown committed
809 810 811 812 813 814 815 816 817 818 819 820 821 822
    }
  }
  
  new_val = (long) ((unsigned long int) val / 10);
  *--p    = '0'+ (char) ((unsigned long int) val - (unsigned long) new_val * 10);
  val     = new_val;
  
  while (val != 0)
  {
    new_val=val/10;
    *--p = '0' + (char) (val-new_val*10);
    val= new_val;
  }
  
823 824 825
  len= min(len, (uint) (e-p));
  memcpy(dst, p, len);
  return (int) len+sign;
unknown's avatar
unknown committed
826
}
827

828

unknown's avatar
unknown committed
829
int my_longlong10_to_str_8bit(CHARSET_INFO *cs __attribute__((unused)),
830 831
		      char *dst, uint len, int radix, longlong val)
{
unknown's avatar
unknown committed
832 833 834
  char buffer[65];
  register char *p, *e;
  long long_val;
835
  uint sign= 0;
unknown's avatar
unknown committed
836 837 838 839 840
  
  if (radix < 0)
  {
    if (val < 0)
    {
unknown's avatar
unknown committed
841
      val = -(ulonglong)val;
842 843 844
      *dst++= '-';
      len--;
      sign= 1;
unknown's avatar
unknown committed
845 846 847 848
    }
  }
  
  e = p = &buffer[sizeof(buffer)-1];
849
  *p= 0;
unknown's avatar
unknown committed
850 851 852
  
  if (val == 0)
  {
853 854
    *--p= '0';
    len= 1;
unknown's avatar
unknown committed
855 856 857 858 859 860 861 862 863 864 865 866 867 868 869
    goto cnv;
  }
  
  while ((ulonglong) val > (ulonglong) LONG_MAX)
  {
    ulonglong quo=(ulonglong) val/(uint) 10;
    uint rem= (uint) (val- quo* (uint) 10);
    *--p = '0' + rem;
    val= quo;
  }
  
  long_val= (long) val;
  while (long_val != 0)
  {
    long quo= long_val/10;
870
    *--p = (char) ('0' + (long_val - quo*10));
unknown's avatar
unknown committed
871 872 873
    long_val= quo;
  }
  
874
  len= min(len, (uint) (e-p));
unknown's avatar
unknown committed
875
cnv:
876 877
  memcpy(dst, p, len);
  return len+sign;
878 879 880
}


881 882 883 884 885 886 887 888 889 890 891 892 893
/*
** Compare string against string with wildcard
**	0 if matched
**	-1 if not matched with wildcard
**	 1 if matched with wildcard
*/

#ifdef LIKE_CMP_TOUPPER
#define likeconv(s,A) (uchar) my_toupper(s,A)
#else
#define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)]
#endif

unknown's avatar
unknown committed
894
#define INC_PTR(cs,A,B) (A)++
895 896 897 898 899 900 901


int my_wildcmp_8bit(CHARSET_INFO *cs,
		    const char *str,const char *str_end,
		    const char *wildstr,const char *wildend,
		    int escape, int w_one, int w_many)
{
unknown's avatar
unknown committed
902
  int result= -1;			/* Not found, using wildcards */
903 904 905 906 907 908 909 910 911

  while (wildstr != wildend)
  {
    while (*wildstr != w_many && *wildstr != w_one)
    {
      if (*wildstr == escape && wildstr+1 != wildend)
	wildstr++;

      if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
unknown's avatar
unknown committed
912
	return(1);				/* No match */
913
      if (wildstr == wildend)
unknown's avatar
unknown committed
914
	return(str != str_end);		/* Match if both are at end */
unknown's avatar
unknown committed
915
      result=1;					/* Found an anchor char     */
916 917 918 919 920
    }
    if (*wildstr == w_one)
    {
      do
      {
unknown's avatar
unknown committed
921
	if (str == str_end)			/* Skip one char if possible */
unknown's avatar
unknown committed
922
	  return(result);
923 924 925 926 927 928
	INC_PTR(cs,str,str_end);
      } while (++wildstr < wildend && *wildstr == w_one);
      if (wildstr == wildend)
	break;
    }
    if (*wildstr == w_many)
unknown's avatar
unknown committed
929
    {						/* Found w_many */
930 931 932 933 934 935 936 937 938 939 940
      uchar cmp;
      
      wildstr++;
      /* Remove any '%' and '_' from the wild search string */
      for (; wildstr != wildend ; wildstr++)
      {
	if (*wildstr == w_many)
	  continue;
	if (*wildstr == w_one)
	{
	  if (str == str_end)
unknown's avatar
unknown committed
941
	    return(-1);
942 943 944
	  INC_PTR(cs,str,str_end);
	  continue;
	}
unknown's avatar
unknown committed
945
	break;					/* Not a wild character */
946 947
      }
      if (wildstr == wildend)
unknown's avatar
unknown committed
948
	return(0);				/* Ok if w_many is last */
949
      if (str == str_end)
unknown's avatar
unknown committed
950
	return(-1);
951 952 953 954
      
      if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
	cmp= *++wildstr;

unknown's avatar
unknown committed
955 956
      INC_PTR(cs,wildstr,wildend);	/* This is compared trough cmp */
      cmp=likeconv(cs,cmp);
957 958
      do
      {
unknown's avatar
unknown committed
959 960 961
	while (str != str_end && (uchar) likeconv(cs,*str) != cmp)
	  str++;
	if (str++ == str_end) return(-1);
962
	{
unknown's avatar
unknown committed
963 964
	  int tmp=my_wildcmp_8bit(cs,str,str_end,wildstr,wildend,escape,w_one,
				  w_many);
965
	  if (tmp <= 0)
unknown's avatar
unknown committed
966
	    return(tmp);
967 968 969 970 971
	}
      } while (str != str_end && wildstr[0] != w_many);
      return(-1);
    }
  }
unknown's avatar
unknown committed
972
  return(str != str_end ? 1 : 0);
973
}
974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993


/*
** Calculate min_str and max_str that ranges a LIKE string.
** Arguments:
** ptr		Pointer to LIKE string.
** ptr_length	Length of LIKE string.
** escape	Escape character in LIKE.  (Normally '\').
**		All escape characters should be removed from min_str and max_str
** res_length	Length of min_str and max_str.
** min_str	Smallest case sensitive string that ranges LIKE.
**		Should be space padded to res_length.
** max_str	Largest case sensitive string that ranges LIKE.
**		Normally padded with the biggest character sort value.
**
** The function should return 0 if ok and 1 if the LIKE string can't be
** optimized !
*/

my_bool my_like_range_simple(CHARSET_INFO *cs,
unknown's avatar
unknown committed
994 995 996 997 998
			     const char *ptr,uint ptr_length,
			     pbool escape, pbool w_one, pbool w_many,
			     uint res_length,
			     char *min_str,char *max_str,
			     uint *min_length,uint *max_length)
999
{
1000
  const char *end= ptr + ptr_length;
1001 1002
  char *min_org=min_str;
  char *min_end=min_str+res_length;
1003
  uint charlen= res_length / cs->mbmaxlen;
1004

1005
  for (; ptr != end && min_str != min_end && charlen > 0 ; ptr++, charlen--)
1006 1007 1008
  {
    if (*ptr == escape && ptr+1 != end)
    {
unknown's avatar
unknown committed
1009
      ptr++;					/* Skip escape */
1010 1011 1012
      *min_str++= *max_str++ = *ptr;
      continue;
    }
unknown's avatar
unknown committed
1013
    if (*ptr == w_one)				/* '_' in SQL */
1014
    {
unknown's avatar
unknown committed
1015
      *min_str++='\0';				/* This should be min char */
1016
      *max_str++= (char) cs->max_sort_char;
1017 1018
      continue;
    }
unknown's avatar
unknown committed
1019
    if (*ptr == w_many)				/* '%' in SQL */
1020 1021 1022
    {
      *min_length= (uint) (min_str - min_org);
      *max_length=res_length;
1023 1024 1025 1026
      do
      {
	*min_str++= 0;
	*max_str++= (char) cs->max_sort_char;
1027 1028 1029 1030 1031 1032 1033 1034
      } while (min_str != min_end);
      return 0;
    }
    *min_str++= *max_str++ = *ptr;
  }
  *min_length= *max_length = (uint) (min_str - min_org);

  while (min_str != min_end)
unknown's avatar
unknown committed
1035
    *min_str++ = *max_str++ = ' ';	/* Because if key compression */
1036 1037
  return 0;
}
1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048


ulong my_scan_8bit(CHARSET_INFO *cs, const char *str, const char *end, int sq)
{
  const char *str0= str;
  switch (sq)
  {
  case MY_SEQ_INTTAIL:
    if (*str == '.')
    {
      for(str++ ; str != end && *str == '0' ; str++);
unknown's avatar
unknown committed
1049
      return (ulong) (str - str0);
1050 1051 1052 1053
    }
    return 0;

  case MY_SEQ_SPACES:
1054
    for ( ; str < end ; str++)
1055 1056 1057 1058
    {
      if (!my_isspace(cs,*str))
        break;
    }
unknown's avatar
unknown committed
1059
    return (ulong) (str - str0);
1060 1061 1062 1063
  default:
    return 0;
  }
}
1064

1065

1066 1067 1068 1069
void my_fill_8bit(CHARSET_INFO *cs __attribute__((unused)),
		   char *s, uint l, int fill)
{
  bfill(s,l,fill);
1070 1071
}

1072

1073 1074 1075
uint my_numchars_8bit(CHARSET_INFO *cs __attribute__((unused)),
		      const char *b, const char *e)
{
unknown's avatar
unknown committed
1076
  return (uint) (e - b);
1077 1078
}

1079

1080 1081 1082
uint my_numcells_8bit(CHARSET_INFO *cs __attribute__((unused)),
		      const char *b, const char *e)
{
unknown's avatar
unknown committed
1083
  return (uint) (e - b);
1084 1085 1086
}


1087 1088 1089 1090 1091 1092 1093
uint my_charpos_8bit(CHARSET_INFO *cs __attribute__((unused)),
		     const char *b  __attribute__((unused)),
		     const char *e  __attribute__((unused)),
		     uint pos)
{
  return pos;
}
1094

1095 1096

uint my_well_formed_len_8bit(CHARSET_INFO *cs __attribute__((unused)),
1097 1098
                             const char *start, const char *end,
                             uint nchars, int *error)
unknown's avatar
unknown committed
1099
{
1100
  uint nbytes= (uint) (end-start);
1101
  *error= 0;
1102
  return min(nbytes, nchars);
unknown's avatar
unknown committed
1103 1104
}

1105

unknown's avatar
unknown committed
1106 1107 1108 1109 1110 1111 1112 1113 1114 1115
uint my_lengthsp_8bit(CHARSET_INFO *cs __attribute__((unused)),
		      const char *ptr, uint length)
{
  const char *end= ptr+length;
  while (end > ptr && end[-1] == ' ')
    end--;
  return (uint) (end-ptr);
}


1116
uint my_instr_simple(CHARSET_INFO *cs,
unknown's avatar
unknown committed
1117 1118
                    const char *b, uint b_length, 
		    const char *s, uint s_length,
1119
		    my_match_t *match, uint nmatch)
1120 1121 1122 1123 1124 1125
{
  register const uchar *str, *search, *end, *search_end;
  
  if (s_length <= b_length)
  {
    if (!s_length)
1126 1127 1128 1129 1130 1131 1132 1133 1134
    {
      if (nmatch)
      {
        match->beg= 0;
        match->end= 0;
        match->mblen= 0;
      }
      return 1;		/* Empty string is always found */
    }
1135
    
unknown's avatar
unknown committed
1136 1137 1138 1139
    str= (const uchar*) b;
    search= (const uchar*) s;
    end= (const uchar*) b+b_length-s_length+1;
    search_end= (const uchar*) s + s_length;
1140
    
1141
skip:
1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152
    while (str != end)
    {
      if (cs->sort_order[*str++] == cs->sort_order[*search])
      {
	register const uchar *i,*j;
	
	i= str; 
	j= search+1;
	
	while (j != search_end)
	  if (cs->sort_order[*i++] != cs->sort_order[*j++]) 
1153
            goto skip;
1154
        
1155 1156 1157
	if (nmatch > 0)
	{
	  match[0].beg= 0;
unknown's avatar
unknown committed
1158
	  match[0].end= str- (const uchar*)b-1;
1159 1160 1161 1162 1163 1164 1165 1166 1167 1168
	  match[0].mblen= match[0].end;
	  
	  if (nmatch > 1)
	  {
	    match[1].beg= match[0].end;
	    match[1].end= match[0].end+s_length;
	    match[1].mblen= match[1].end-match[1].beg;
	  }
	}
	return 2;
1169 1170 1171
      }
    }
  }
1172
  return 0;
1173 1174 1175
}


1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201
typedef struct
{
  int		nchars;
  MY_UNI_IDX	uidx;
} uni_idx;

#define PLANE_SIZE	0x100
#define PLANE_NUM	0x100
#define PLANE_NUMBER(x)	(((x)>>8) % PLANE_NUM)

static int pcmp(const void * f, const void * s)
{
  const uni_idx *F= (const uni_idx*) f;
  const uni_idx *S= (const uni_idx*) s;
  int res;

  if (!(res=((S->nchars)-(F->nchars))))
    res=((F->uidx.from)-(S->uidx.to));
  return res;
}

static my_bool create_fromuni(CHARSET_INFO *cs, void *(*alloc)(uint))
{
  uni_idx	idx[PLANE_NUM];
  int		i,n;
  
1202 1203 1204 1205 1206 1207 1208 1209 1210
  /*
    Check that Unicode map is loaded.
    It can be not loaded when the collation is
    listed in Index.xml but not specified
    in the character set specific XML file.
  */
  if (!cs->tab_to_uni)
    return TRUE;
  
1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280
  /* Clear plane statistics */
  bzero(idx,sizeof(idx));
  
  /* Count number of characters in each plane */
  for (i=0; i< 0x100; i++)
  {
    uint16 wc=cs->tab_to_uni[i];
    int pl= PLANE_NUMBER(wc);
    
    if (wc || !i)
    {
      if (!idx[pl].nchars)
      {
        idx[pl].uidx.from=wc;
        idx[pl].uidx.to=wc;
      }else
      {
        idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
        idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
      }
      idx[pl].nchars++;
    }
  }
  
  /* Sort planes in descending order */
  qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
  
  for (i=0; i < PLANE_NUM; i++)
  {
    int ch,numchars;
    
    /* Skip empty plane */
    if (!idx[i].nchars)
      break;
    
    numchars=idx[i].uidx.to-idx[i].uidx.from+1;
    if (!(idx[i].uidx.tab=(uchar*) alloc(numchars * sizeof(*idx[i].uidx.tab))))
      return TRUE;
    
    bzero(idx[i].uidx.tab,numchars*sizeof(*idx[i].uidx.tab));
    
    for (ch=1; ch < PLANE_SIZE; ch++)
    {
      uint16 wc=cs->tab_to_uni[ch];
      if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
      {
        int ofs= wc - idx[i].uidx.from;
        idx[i].uidx.tab[ofs]= ch;
      }
    }
  }
  
  /* Allocate and fill reverse table for each plane */
  n=i;
  if (!(cs->tab_from_uni= (MY_UNI_IDX*) alloc(sizeof(MY_UNI_IDX)*(n+1))))
    return TRUE;

  for (i=0; i< n; i++)
    cs->tab_from_uni[i]= idx[i].uidx;
  
  /* Set end-of-list marker */
  bzero(&cs->tab_from_uni[i],sizeof(MY_UNI_IDX));
  return FALSE;
}

static my_bool my_cset_init_8bit(CHARSET_INFO *cs, void *(*alloc)(uint))
{
  return create_fromuni(cs, alloc);
}

1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306
static void set_max_sort_char(CHARSET_INFO *cs)
{
  uchar max_char;
  uint  i;
  
  if (!cs->sort_order)
    return;
  
  max_char=cs->sort_order[(uchar) cs->max_sort_char];
  for (i= 0; i < 256; i++)
  {
    if ((uchar) cs->sort_order[i] > max_char)
    {
      max_char=(uchar) cs->sort_order[i];
      cs->max_sort_char= i;
    }
  }
}

static my_bool my_coll_init_simple(CHARSET_INFO *cs,
                                   void *(*alloc)(uint) __attribute__((unused)))
{
  set_max_sort_char(cs);
  return FALSE;
}

1307

1308 1309 1310
longlong my_strtoll10_8bit(CHARSET_INFO *cs __attribute__((unused)),
                           const char *nptr, char **endptr, int *error)
{
1311
  return my_strtoll10(nptr, endptr, error);
1312 1313
}

1314

1315 1316
MY_CHARSET_HANDLER my_charset_8bit_handler=
{
1317
    my_cset_init_8bit,
1318
    NULL,			/* ismbchar      */
1319
    my_mbcharlen_8bit,		/* mbcharlen     */
1320 1321
    my_numchars_8bit,
    my_charpos_8bit,
1322
    my_well_formed_len_8bit,
unknown's avatar
unknown committed
1323
    my_lengthsp_8bit,
1324
    my_numcells_8bit,
1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339
    my_mb_wc_8bit,
    my_wc_mb_8bit,
    my_caseup_str_8bit,
    my_casedn_str_8bit,
    my_caseup_8bit,
    my_casedn_8bit,
    my_snprintf_8bit,
    my_long10_to_str_8bit,
    my_longlong10_to_str_8bit,
    my_fill_8bit,
    my_strntol_8bit,
    my_strntoul_8bit,
    my_strntoll_8bit,
    my_strntoull_8bit,
    my_strntod_8bit,
1340
    my_strtoll10_8bit,
1341 1342 1343 1344 1345
    my_scan_8bit
};

MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
{
1346
    my_coll_init_simple,	/* init */
1347 1348 1349 1350 1351 1352
    my_strnncoll_simple,
    my_strnncollsp_simple,
    my_strnxfrm_simple,
    my_like_range_simple,
    my_wildcmp_8bit,
    my_strcasecmp_8bit,
1353
    my_instr_simple,
1354 1355
    my_hash_sort_simple
};