sql_string.cc 30.9 KB
Newer Older
Sergei Golubchik's avatar
Sergei Golubchik committed
1
/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
unknown's avatar
unknown committed
2 3 4

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
unknown's avatar
unknown committed
5
   the Free Software Foundation; version 2 of the License.
unknown's avatar
unknown committed
6 7

   This program is distributed in the hope that it will be useful,
unknown's avatar
unknown committed
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
unknown's avatar
unknown committed
9 10 11 12 13
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
unknown's avatar
unknown committed
15 16 17

/* This file is originally from the mysql distribution. Coded by monty */

18
#ifdef USE_PRAGMA_IMPLEMENTATION
unknown's avatar
unknown committed
19 20 21
#pragma implementation				// gcc: Class implementation
#endif

22
#include <my_global.h>
unknown's avatar
unknown committed
23 24 25
#include <my_sys.h>
#include <m_string.h>
#include <m_ctype.h>
26
#include <mysql_com.h>
unknown's avatar
unknown committed
27 28 29 30 31 32 33

#include "sql_string.h"

/*****************************************************************************
** String functions
*****************************************************************************/

34
bool String::real_alloc(uint32 length)
unknown's avatar
unknown committed
35
{
36 37 38 39
  uint32 arg_length= ALIGN_SIZE(length + 1);
  DBUG_ASSERT(arg_length > length);
  if (arg_length <= length)
    return TRUE;                                 /* Overflow */
40
  str_length=0;
unknown's avatar
unknown committed
41 42 43
  if (Alloced_length < arg_length)
  {
    free();
44 45 46
    if (!(Ptr=(char*) my_malloc(arg_length,MYF(MY_WME |
                                               (thread_specific ?
                                                MY_THREAD_SPECIFIC : 0)))))
unknown's avatar
unknown committed
47 48 49 50 51 52 53 54 55
      return TRUE;
    Alloced_length=arg_length;
    alloced=1;
  }
  Ptr[0]=0;
  return FALSE;
}


56 57 58 59 60 61 62 63 64 65 66
/**
   Allocates a new buffer on the heap for this String.

   - If the String's internal buffer is privately owned and heap allocated,
     one of the following is performed.

     - If the requested length is greater than what fits in the buffer, a new
       buffer is allocated, data moved and the old buffer freed.

     - If the requested length is less or equal to what fits in the buffer, a
       null character is inserted at the appropriate position.
unknown's avatar
unknown committed
67

68 69 70 71 72
   - If the String does not keep a private buffer on the heap, such a buffer
     will be allocated and the string copied accoring to its length, as found
     in String::length().
 
   For C compatibility, the new string buffer is null terminated.
unknown's avatar
unknown committed
73

74 75 76 77 78 79 80 81 82
   @param alloc_length The requested string size in characters, excluding any
   null terminator.

   @retval false Either the copy operation is complete or, if the size of the
   new buffer is smaller than the currently allocated buffer (if one exists),
   no allocation occured.

   @retval true An error occured when attempting to allocate memory.
*/
83
bool String::realloc_raw(uint32 alloc_length)
unknown's avatar
unknown committed
84
{
85
  if (Alloced_length <= alloc_length)
unknown's avatar
unknown committed
86 87
  {
    char *new_ptr;
88
    uint32 len= ALIGN_SIZE(alloc_length+1);
Sergei Golubchik's avatar
Sergei Golubchik committed
89 90 91
    DBUG_ASSERT(len > alloc_length);
    if (len <= alloc_length)
      return TRUE;                                 /* Overflow */
unknown's avatar
unknown committed
92 93
    if (alloced)
    {
94 95 96 97
      if (!(new_ptr= (char*) my_realloc(Ptr,len,
                                        MYF(MY_WME |
                                            (thread_specific ?
                                             MY_THREAD_SPECIFIC : 0)))))
98
        return TRUE;				// Signal error
unknown's avatar
unknown committed
99
    }
100 101 102 103
    else if ((new_ptr= (char*) my_malloc(len,
                                         MYF(MY_WME |
                                             (thread_specific ?
                                              MY_THREAD_SPECIFIC : 0)))))
unknown's avatar
unknown committed
104
    {
105 106
      if (str_length > len - 1)
        str_length= 0;
107 108
      if (str_length)				// Avoid bugs in memcpy on AIX
	memcpy(new_ptr,Ptr,str_length);
unknown's avatar
unknown committed
109 110 111 112 113
      new_ptr[str_length]=0;
      alloced=1;
    }
    else
      return TRUE;			// Signal error
114 115
    Ptr= new_ptr;
    Alloced_length= len;
unknown's avatar
unknown committed
116 117 118 119
  }
  return FALSE;
}

120
bool String::set_int(longlong num, bool unsigned_flag, CHARSET_INFO *cs)
unknown's avatar
unknown committed
121
{
122
  uint l=20*cs->mbmaxlen+1;
unknown's avatar
unknown committed
123
  int base= unsigned_flag ? 10 : -10;
124 125

  if (alloc(l))
unknown's avatar
unknown committed
126
    return TRUE;
unknown's avatar
unknown committed
127
  str_length=(uint32) (cs->cset->longlong10_to_str)(cs,Ptr,l,base,num);
128
  str_charset=cs;
unknown's avatar
unknown committed
129 130 131
  return FALSE;
}

132
bool String::set_real(double num,uint decimals, CHARSET_INFO *cs)
unknown's avatar
unknown committed
133
{
134
  char buff[FLOATING_POINT_BUFFER];
135
  uint dummy_errors;
136
  size_t len;
137 138

  str_charset=cs;
unknown's avatar
unknown committed
139 140
  if (decimals >= NOT_FIXED_DEC)
  {
141
    len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
142
    return copy(buff, len, &my_charset_latin1, cs, &dummy_errors);
unknown's avatar
unknown committed
143
  }
144 145
  len= my_fcvt(num, decimals, buff, NULL);
  return copy(buff, (uint32) len, &my_charset_latin1, cs,
146
              &dummy_errors);
unknown's avatar
unknown committed
147 148 149 150 151 152 153 154 155 156 157 158 159
}


bool String::copy()
{
  if (!alloced)
  {
    Alloced_length=0;				// Force realloc
    return realloc(str_length);
  }
  return FALSE;
}

160 161 162 163 164 165 166 167 168 169 170
/**
   Copies the internal buffer from str. If this String has a private heap
   allocated buffer where new data does not fit, a new buffer is allocated
   before copying and the old buffer freed. Character set information is also
   copied.
   
   @param str The string whose internal buffer is to be copied.
   
   @retval false Success.
   @retval true Memory allocation failed.
*/
unknown's avatar
unknown committed
171 172 173 174 175 176 177
bool String::copy(const String &str)
{
  if (alloc(str.str_length))
    return TRUE;
  str_length=str.str_length;
  bmove(Ptr,str.Ptr,str_length);		// May be overlapping
  Ptr[str_length]=0;
178
  str_charset=str.str_charset;
unknown's avatar
unknown committed
179 180 181
  return FALSE;
}

182
bool String::copy(const char *str,uint32 arg_length, CHARSET_INFO *cs)
unknown's avatar
unknown committed
183 184 185
{
  if (alloc(arg_length))
    return TRUE;
186 187
  if ((str_length=arg_length))
    memcpy(Ptr,str,arg_length);
unknown's avatar
unknown committed
188
  Ptr[arg_length]=0;
189
  str_charset=cs;
unknown's avatar
unknown committed
190 191 192
  return FALSE;
}

193 194

/*
unknown's avatar
unknown committed
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
  Checks that the source string can be just copied to the destination string
  without conversion.

  SYNPOSIS

  needs_conversion()
  arg_length		Length of string to copy.
  from_cs		Character set to copy from
  to_cs			Character set to copy to
  uint32 *offset	Returns number of unaligned characters.

  RETURN
   0  No conversion needed
   1  Either character set conversion or adding leading  zeros
      (e.g. for UCS-2) must be done
210 211 212 213

  NOTE
  to_cs may be NULL for "no conversion" if the system variable
  character_set_results is NULL.
214
*/
unknown's avatar
unknown committed
215 216 217 218 219

bool String::needs_conversion(uint32 arg_length,
			      CHARSET_INFO *from_cs,
			      CHARSET_INFO *to_cs,
			      uint32 *offset)
220
{
unknown's avatar
unknown committed
221
  *offset= 0;
222 223
  if (!to_cs ||
      (to_cs == &my_charset_bin) || 
unknown's avatar
unknown committed
224 225
      (to_cs == from_cs) ||
      my_charset_same(from_cs, to_cs) ||
unknown's avatar
unknown committed
226 227
      ((from_cs == &my_charset_bin) &&
       (!(*offset=(arg_length % to_cs->mbminlen)))))
228 229 230 231
    return FALSE;
  return TRUE;
}

unknown's avatar
unknown committed
232

233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
/*
  Checks that the source string can just be copied to the destination string
  without conversion.
  Unlike needs_conversion it will require conversion on incoming binary data
  to ensure the data are verified for vailidity first.

  @param arg_length   Length of string to copy.
  @param from_cs      Character set to copy from
  @param to_cs        Character set to copy to

  @return conversion needed
*/
bool String::needs_conversion_on_storage(uint32 arg_length,
                                         CHARSET_INFO *cs_from,
                                         CHARSET_INFO *cs_to)
{
  uint32 offset;
  return (needs_conversion(arg_length, cs_from, cs_to, &offset) ||
251 252 253 254 255 256 257 258 259 260 261
          /* force conversion when storing a binary string */
          (cs_from == &my_charset_bin &&
          /* into a non-binary destination */
           cs_to != &my_charset_bin &&
           /* and any of the following is true :*/
           (
            /* it's a variable length encoding */
            cs_to->mbminlen != cs_to->mbmaxlen ||
            /* longer than 2 bytes : neither 1 byte nor ucs2 */
            cs_to->mbminlen > 2 ||
            /* and is not a multiple of the char byte size */
262 263 264 265 266 267 268
            0 != (arg_length % cs_to->mbmaxlen)
           )
          )
         );
}


unknown's avatar
unknown committed
269
/*
unknown's avatar
unknown committed
270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
  Copy a multi-byte character sets with adding leading zeros.

  SYNOPSIS

  copy_aligned()
  str			String to copy
  arg_length		Length of string. This should NOT be dividable with
			cs->mbminlen.
  offset		arg_length % cs->mb_minlength
  cs			Character set for 'str'

  NOTES
    For real multi-byte, ascii incompatible charactser sets,
    like UCS-2, add leading zeros if we have an incomplete character.
    Thus, 
      SELECT _ucs2 0xAA 
    will automatically be converted into
      SELECT _ucs2 0x00AA

  RETURN
    0  ok
    1  error
unknown's avatar
unknown committed
292 293
*/

unknown's avatar
unknown committed
294
bool String::copy_aligned(const char *str,uint32 arg_length, uint32 offset,
295
			  CHARSET_INFO *cs)
unknown's avatar
unknown committed
296 297
{
  /* How many bytes are in incomplete character */
298 299
  offset= cs->mbminlen - offset; /* How many zeros we should prepend */
  DBUG_ASSERT(offset && offset != cs->mbminlen);
unknown's avatar
unknown committed
300

unknown's avatar
unknown committed
301
  uint32 aligned_length= arg_length + offset;
unknown's avatar
unknown committed
302 303 304 305
  if (alloc(aligned_length))
    return TRUE;
  
  /*
unknown's avatar
unknown committed
306 307
    Note, this is only safe for big-endian UCS-2.
    If we add little-endian UCS-2 sometimes, this code
unknown's avatar
unknown committed
308
    will be more complicated. But it's OK for now.
unknown's avatar
unknown committed
309
  */
unknown's avatar
unknown committed
310 311
  bzero((char*) Ptr, offset);
  memcpy(Ptr + offset, str, arg_length);
unknown's avatar
unknown committed
312
  Ptr[aligned_length]=0;
unknown's avatar
unknown committed
313 314 315
  /* str_length is always >= 0 as arg_length is != 0 */
  str_length= aligned_length;
  str_charset= cs;
unknown's avatar
unknown committed
316 317 318
  return FALSE;
}

319 320 321 322 323

bool String::set_or_copy_aligned(const char *str,uint32 arg_length,
				 CHARSET_INFO *cs)
{
  /* How many bytes are in incomplete character */
unknown's avatar
unknown committed
324
  uint32 offset= (arg_length % cs->mbminlen); 
325
  
unknown's avatar
unknown committed
326
  if (!offset) /* All characters are complete, just copy */
327 328 329 330
  {
    set(str, arg_length, cs);
    return FALSE;
  }
unknown's avatar
unknown committed
331
  return copy_aligned(str, arg_length, offset, cs);
332 333
}

334 335 336 337 338 339 340 341 342 343

/**
   Copies the character data into this String, with optional character set
   conversion.

   @return
   FALSE ok
   TRUE  Could not allocate result buffer

*/
344

345
bool String::copy(const char *str, uint32 arg_length,
346
		  CHARSET_INFO *from_cs, CHARSET_INFO *to_cs, uint *errors)
347
{
unknown's avatar
unknown committed
348
  uint32 offset;
349 350

  DBUG_ASSERT(!str || str != Ptr);
351
  
unknown's avatar
unknown committed
352
  if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
353
  {
354
    *errors= 0;
355
    return copy(str, arg_length, to_cs);
356
  }
unknown's avatar
unknown committed
357
  if ((from_cs == &my_charset_bin) && offset)
358
  {
359
    *errors= 0;
unknown's avatar
unknown committed
360
    return copy_aligned(str, arg_length, offset, to_cs);
361
  }
362
  uint32 new_length= to_cs->mbmaxlen*arg_length;
363 364
  if (alloc(new_length))
    return TRUE;
365
  str_length=copy_and_convert((char*) Ptr, new_length, to_cs,
366
                              str, arg_length, from_cs, errors);
367 368 369
  str_charset=to_cs;
  return FALSE;
}
370

371 372 373

/*
  Set a string to the value of a latin1-string, keeping the original charset
374
  
375 376 377 378
  SYNOPSIS
    copy_or_set()
    str			String of a simple charset (latin1)
    arg_length		Length of string
379

380 381 382 383 384 385 386 387 388 389 390
  IMPLEMENTATION
    If string object is of a simple character set, set it to point to the
    given string.
    If not, make a copy and convert it to the new character set.

  RETURN
    0	ok
    1	Could not allocate result buffer

*/

391
bool String::set_ascii(const char *str, uint32 arg_length)
392
{
unknown's avatar
unknown committed
393
  if (str_charset->mbminlen == 1)
394 395 396
  {
    set(str, arg_length, str_charset);
    return 0;
397
  }
398 399
  uint dummy_errors;
  return copy(str, arg_length, &my_charset_latin1, str_charset, &dummy_errors);
400 401
}

402

unknown's avatar
unknown committed
403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
/* This is used by mysql.cc */

bool String::fill(uint32 max_length,char fill_char)
{
  if (str_length > max_length)
    Ptr[str_length=max_length]=0;
  else
  {
    if (realloc(max_length))
      return TRUE;
    bfill(Ptr+str_length,max_length-str_length,fill_char);
    str_length=max_length;
  }
  return FALSE;
}

void String::strip_sp()
{
421
   while (str_length && my_isspace(str_charset,Ptr[str_length-1]))
unknown's avatar
unknown committed
422 423 424 425 426
    str_length--;
}

bool String::append(const String &s)
{
427 428
  if (s.length())
  {
429
    if (realloc_with_extra_if_needed(str_length+s.length()))
430 431 432 433
      return TRUE;
    memcpy(Ptr+str_length,s.ptr(),s.length());
    str_length+=s.length();
  }
unknown's avatar
unknown committed
434 435 436
  return FALSE;
}

437 438

/*
439
  Append an ASCII string to the a string of the current character set
440 441
*/

unknown's avatar
unknown committed
442 443
bool String::append(const char *s,uint32 arg_length)
{
444 445 446 447 448 449 450
  if (!arg_length)
    return FALSE;

  /*
    For an ASCII incompatible string, e.g. UCS-2, we need to convert
  */
  if (str_charset->mbminlen > 1)
451 452
  {
    uint32 add_length=arg_length * str_charset->mbmaxlen;
453
    uint dummy_errors;
454
    if (realloc_with_extra_if_needed(str_length+ add_length))
455 456
      return TRUE;
    str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
457 458
				  s, arg_length, &my_charset_latin1,
                                  &dummy_errors);
459 460
    return FALSE;
  }
461 462 463 464

  /*
    For an ASCII compatinble string we can just append.
  */
465
  if (realloc_with_extra_if_needed(str_length+arg_length))
unknown's avatar
unknown committed
466 467 468 469 470 471
    return TRUE;
  memcpy(Ptr+str_length,s,arg_length);
  str_length+=arg_length;
  return FALSE;
}

472

473 474 475 476 477 478
/*
  Append a 0-terminated ASCII string
*/

bool String::append(const char *s)
{
479
  return append(s, (uint) strlen(s));
480 481 482
}


483 484 485 486 487 488 489 490 491 492

bool String::append_ulonglong(ulonglong val)
{
  if (realloc(str_length+MAX_BIGINT_WIDTH+2))
    return TRUE;
  char *end= (char*) longlong10_to_str(val, (char*) Ptr + str_length, 10);
  str_length= end - Ptr;
  return FALSE;
}

493 494 495 496 497 498 499
/*
  Append a string in the given charset to the string
  with character set recoding
*/

bool String::append(const char *s,uint32 arg_length, CHARSET_INFO *cs)
{
Alexander Barkov's avatar
Alexander Barkov committed
500
  uint32 offset;
501
  
Alexander Barkov's avatar
Alexander Barkov committed
502
  if (needs_conversion(arg_length, cs, str_charset, &offset))
503
  {
Alexander Barkov's avatar
Alexander Barkov committed
504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
    uint32 add_length;
    if ((cs == &my_charset_bin) && offset)
    {
      DBUG_ASSERT(str_charset->mbminlen > offset);
      offset= str_charset->mbminlen - offset; // How many characters to pad
      add_length= arg_length + offset;
      if (realloc(str_length + add_length))
        return TRUE;
      bzero((char*) Ptr + str_length, offset);
      memcpy(Ptr + str_length + offset, s, arg_length);
      str_length+= add_length;
      return FALSE;
    }

    add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
519
    uint dummy_errors;
520
    if (realloc_with_extra_if_needed(str_length + add_length)) 
521 522
      return TRUE;
    str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
523
				  s, arg_length, cs, &dummy_errors);
524
  }
525 526
  else
  {
527
    if (realloc_with_extra_if_needed(str_length + arg_length)) 
528
      return TRUE;
529 530 531
    memcpy(Ptr + str_length, s, arg_length);
    str_length+= arg_length;
  }
532 533 534
  return FALSE;
}

535 536
bool String::append(IO_CACHE* file, uint32 arg_length)
{
537
  if (realloc_with_extra_if_needed(str_length+arg_length))
538
    return TRUE;
539
  if (my_b_read(file, (uchar*) Ptr + str_length, arg_length))
540 541 542 543 544 545 546
  {
    shrink(str_length);
    return TRUE;
  }
  str_length+=arg_length;
  return FALSE;
}
unknown's avatar
unknown committed
547

548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565

/**
  Append a parenthesized number to String.
  Used in various pieces of SHOW related code.

  @param nr     Number
  @param radix  Radix, optional parameter, 10 by default.
*/
bool String::append_parenthesized(long nr, int radix)
{
  char buff[64], *end;
  buff[0]= '(';
  end= int10_to_str(nr, buff + 1, radix);
  *end++ = ')';
  return append(buff, (uint) (end - buff));
}


unknown's avatar
unknown committed
566 567 568 569 570
bool String::append_with_prefill(const char *s,uint32 arg_length,
		 uint32 full_length, char fill_char)
{
  int t_length= arg_length > full_length ? arg_length : full_length;

571
  if (realloc_with_extra_if_needed(str_length + t_length))
unknown's avatar
unknown committed
572 573 574 575 576 577 578 579 580 581 582
    return TRUE;
  t_length= full_length - arg_length;
  if (t_length > 0)
  {
    bfill(Ptr+str_length, t_length, fill_char);
    str_length=str_length + t_length;
  }
  append(s, arg_length);
  return FALSE;
}

583
uint32 String::numchars() const
unknown's avatar
unknown committed
584
{
585
  return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
unknown's avatar
unknown committed
586 587
}

Sergei Golubchik's avatar
Sergei Golubchik committed
588
int String::charpos(longlong i,uint32 offset)
unknown's avatar
unknown committed
589
{
590
  if (i <= 0)
591 592
    return (int)i;
  return (int)str_charset->cset->charpos(str_charset,Ptr+offset,Ptr+str_length,(size_t)i);
unknown's avatar
unknown committed
593 594 595 596 597 598 599
}

int String::strstr(const String &s,uint32 offset)
{
  if (s.length()+offset <= str_length)
  {
    if (!s.length())
unknown's avatar
unknown committed
600
      return ((int) offset);	// Empty string is always found
unknown's avatar
unknown committed
601 602 603 604 605

    register const char *str = Ptr+offset;
    register const char *search=s.ptr();
    const char *end=Ptr+str_length-s.length()+1;
    const char *search_end=s.ptr()+s.length();
606
skip:
unknown's avatar
unknown committed
607 608 609 610 611 612 613
    while (str != end)
    {
      if (*str++ == *search)
      {
	register char *i,*j;
	i=(char*) str; j=(char*) search+1;
	while (j != search_end)
614
	  if (*i++ != *j++) goto skip;
unknown's avatar
unknown committed
615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
	return (int) (str-Ptr) -1;
      }
    }
  }
  return -1;
}

/*
** Search string from end. Offset is offset to the end of string
*/

int String::strrstr(const String &s,uint32 offset)
{
  if (s.length() <= offset && offset <= str_length)
  {
    if (!s.length())
      return offset;				// Empty string is always found
    register const char *str = Ptr+offset-1;
    register const char *search=s.ptr()+s.length()-1;

    const char *end=Ptr+s.length()-2;
    const char *search_end=s.ptr()-1;
637
skip:
unknown's avatar
unknown committed
638 639 640 641 642 643 644
    while (str != end)
    {
      if (*str-- == *search)
      {
	register char *i,*j;
	i=(char*) str; j=(char*) search-1;
	while (j != search_end)
645
	  if (*i-- != *j--) goto skip;
unknown's avatar
unknown committed
646 647 648 649 650 651 652 653
	return (int) (i-Ptr) +1;
      }
    }
  }
  return -1;
}

/*
654 655
  Replace substring with string
  If wrong parameter or not enough memory, do nothing
unknown's avatar
unknown committed
656 657 658 659
*/

bool String::replace(uint32 offset,uint32 arg_length,const String &to)
{
660 661 662 663
  return replace(offset,arg_length,to.ptr(),to.length());
}

bool String::replace(uint32 offset,uint32 arg_length,
664
                     const char *to, uint32 to_length)
665
{
666
  long diff = (long) to_length-(long) arg_length;
unknown's avatar
unknown committed
667 668 669 670
  if (offset+arg_length <= str_length)
  {
    if (diff < 0)
    {
671 672 673
      if (to_length)
	memcpy(Ptr+offset,to,to_length);
      bmove(Ptr+offset+to_length,Ptr+offset+arg_length,
unknown's avatar
unknown committed
674 675 676 677 678 679
	    str_length-offset-arg_length);
    }
    else
    {
      if (diff)
      {
680
	if (realloc_with_extra_if_needed(str_length+(uint32) diff))
unknown's avatar
unknown committed
681
	  return TRUE;
682
	bmove_upp((uchar*) Ptr+str_length+diff, (uchar*) Ptr+str_length,
unknown's avatar
unknown committed
683 684
		  str_length-offset-arg_length);
      }
685 686
      if (to_length)
	memcpy(Ptr+offset,to,to_length);
unknown's avatar
unknown committed
687 688 689 690 691 692
    }
    str_length+=(uint32) diff;
  }
  return FALSE;
}

693

unknown's avatar
unknown committed
694 695 696 697 698
// added by Holyfoot for "geometry" needs
int String::reserve(uint32 space_needed, uint32 grow_by)
{
  if (Alloced_length < str_length + space_needed)
  {
699
    if (realloc(Alloced_length + MY_MAX(space_needed, grow_by) - 1))
unknown's avatar
unknown committed
700 701 702 703 704
      return TRUE;
  }
  return FALSE;
}

unknown's avatar
unknown committed
705
void String::qs_append(const char *str, uint32 len)
unknown's avatar
unknown committed
706 707 708 709 710 711 712 713
{
  memcpy(Ptr + str_length, str, len + 1);
  str_length += len;
}

void String::qs_append(double d)
{
  char *buff = Ptr + str_length;
714 715
  str_length+= my_gcvt(d, MY_GCVT_ARG_DOUBLE, FLOATING_POINT_BUFFER - 1, buff,
                       NULL);
unknown's avatar
unknown committed
716 717 718 719 720
}

void String::qs_append(double *d)
{
  double ld;
unknown's avatar
unknown committed
721
  float8get(ld, (char*) d);
unknown's avatar
unknown committed
722 723 724
  qs_append(ld);
}

725 726
void String::qs_append(int i)
{
unknown's avatar
unknown committed
727 728 729
  char *buff= Ptr + str_length;
  char *end= int10_to_str(i, buff, -10);
  str_length+= (int) (end-buff);
730 731
}

732
void String::qs_append(ulonglong i)
733
{
unknown's avatar
unknown committed
734
  char *buff= Ptr + str_length;
735
  char *end= longlong10_to_str(i, buff,10);
unknown's avatar
unknown committed
736
  str_length+= (int) (end-buff);
737 738
}

unknown's avatar
unknown committed
739 740 741 742 743 744 745 746 747 748 749
/*
  Compare strings according to collation, without end space.

  SYNOPSIS
    sortcmp()
    s		First string
    t		Second string
    cs		Collation

  NOTE:
    Normally this is case sensitive comparison
unknown's avatar
unknown committed
750

unknown's avatar
unknown committed
751 752 753 754 755 756 757 758
  RETURN
  < 0	s < t
  0	s == t
  > 0	s > t
*/


int sortcmp(const String *s,const String *t, CHARSET_INFO *cs)
unknown's avatar
unknown committed
759
{
unknown's avatar
unknown committed
760
 return cs->coll->strnncollsp(cs,
unknown's avatar
unknown committed
761 762
                              (uchar *) s->ptr(),s->length(),
                              (uchar *) t->ptr(),t->length(), 0);
unknown's avatar
unknown committed
763 764 765 766 767 768 769 770 771 772 773 774
}


/*
  Compare strings byte by byte. End spaces are also compared.

  SYNOPSIS
    stringcmp()
    s		First string
    t		Second string

  NOTE:
unknown's avatar
unknown committed
775
    Strings are compared as a stream of uchars
unknown's avatar
unknown committed
776 777 778 779 780 781 782 783 784

  RETURN
  < 0	s < t
  0	s == t
  > 0	s > t
*/


int stringcmp(const String *s,const String *t)
unknown's avatar
unknown committed
785
{
786
  uint32 s_len=s->length(),t_len=t->length(),len=MY_MIN(s_len,t_len);
unknown's avatar
unknown committed
787 788
  int cmp= memcmp(s->ptr(), t->ptr(), len);
  return (cmp) ? cmp : (int) (s_len - t_len);
unknown's avatar
unknown committed
789 790 791 792 793 794 795
}


String *copy_if_not_alloced(String *to,String *from,uint32 from_length)
{
  if (from->Alloced_length >= from_length)
    return from;
796
  if ((from->alloced && (from->Alloced_length != 0)) || !to || from == to)
unknown's avatar
unknown committed
797 798 799 800 801 802
  {
    (void) from->realloc(from_length);
    return from;
  }
  if (to->realloc(from_length))
    return from;				// Actually an error
803
  if ((to->str_length=MY_MIN(from->str_length,from_length)))
804
    memcpy(to->Ptr,from->Ptr,to->str_length);
805
  to->str_charset=from->str_charset;
unknown's avatar
unknown committed
806 807 808 809
  return to;
}


810 811 812 813
/****************************************************************************
  Help functions
****************************************************************************/

814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875
/**
  Copy string with HEX-encoding of "bad" characters.

  @details This functions copies the string pointed by "src"
  to the string pointed by "dst". Not more than "srclen" bytes
  are read from "src". Any sequences of bytes representing
  a not-well-formed substring (according to cs) are hex-encoded,
  and all well-formed substrings (according to cs) are copied as is.
  Not more than "dstlen" bytes are written to "dst". The number 
  of bytes written to "dst" is returned.
  
   @param      cs       character set pointer of the destination string
   @param[out] dst      destination string
   @param      dstlen   size of dst
   @param      src      source string
   @param      srclen   length of src

   @retval     result length
*/

size_t
my_copy_with_hex_escaping(CHARSET_INFO *cs,
                          char *dst, size_t dstlen,
                          const char *src, size_t srclen)
{
  const char *srcend= src + srclen;
  char *dst0= dst;

  for ( ; src < srcend ; )
  {
    size_t chlen;
    if ((chlen= my_ismbchar(cs, src, srcend)))
    {
      if (dstlen < chlen)
        break; /* purecov: inspected */
      memcpy(dst, src, chlen);
      src+= chlen;
      dst+= chlen;
      dstlen-= chlen;
    }
    else if (*src & 0x80)
    {
      if (dstlen < 4)
        break; /* purecov: inspected */
      *dst++= '\\';
      *dst++= 'x';
      *dst++= _dig_vec_upper[((unsigned char) *src) >> 4];
      *dst++= _dig_vec_upper[((unsigned char) *src) & 15];
      src++;
      dstlen-= 4;
    }
    else
    {
      if (dstlen < 1)
        break; /* purecov: inspected */
      *dst++= *src++;
      dstlen--;
    }
  }
  return dst - dst0;
}

876

877
/*
878
  Copy a string,
879 880
  with optional character set conversion,
  with optional left padding (for binary -> UCS2 conversion)
881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901

  In case if there is a Unicode conversion (i.e. to_cs and from_cs are
  different character sets and both are not &my_charset_bin), bad input bytes
  as well as characters that cannot be encoded in to_cs are replaced to '?'.

  In case of non-Unicode copying (i.e. to_cs and from_cs are same character set,
  or from_cs is &my_charset_bin),  the function stops on the first bad
  byte sequence.

  The string that is written to "to" is always well-formed.

  @param to                  The destination string
  @param to_length           Space available in "to"
  @param to_cs               Character set of the "to" string
  @param from                The source string
  @param from_length         Length of the "from" string
  @param from_cs             Character set of the "from" string
  @param nchars              Copy not more than "nchars" characters

  The members as set as follows:
  m_well_formed_error_pos    To the position when "from" is not well formed
902
                             or NULL otherwise.
903
  m_cannot_convert_error_pos To the position where a not convertable
904
                             character met, or NULL otherwise.
905
  m_source_end_pos           To the position where scanning of the "from"
906 907
                             string stopped.

908
  @returns                   number of bytes that were written to 'to'
909
*/
910 911 912 913 914 915
uint
String_copier::well_formed_copy(CHARSET_INFO *to_cs,
                                char *to, uint to_length,
                                CHARSET_INFO *from_cs,
                                const char *from, uint from_length,
                                uint nchars)
916 917 918 919 920 921 922 923 924 925
{
  uint res;

  if ((to_cs == &my_charset_bin) || 
      (from_cs == &my_charset_bin) ||
      (to_cs == from_cs) ||
      my_charset_same(from_cs, to_cs))
  {
    if (to_length < to_cs->mbminlen || !nchars)
    {
926 927 928
      m_source_end_pos= from;
      m_cannot_convert_error_pos= NULL;
      m_well_formed_error_pos= NULL;
929 930 931 932 933
      return 0;
    }

    if (to_cs == &my_charset_bin)
    {
934
      res= MY_MIN(MY_MIN(nchars, to_length), from_length);
935
      memmove(to, from, res);
936 937 938
      m_source_end_pos= from + res;
      m_well_formed_error_pos= NULL;
      m_cannot_convert_error_pos= NULL;
939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955
    }
    else
    {
      int well_formed_error;
      uint from_offset;

      if ((from_offset= (from_length % to_cs->mbminlen)) &&
          (from_cs == &my_charset_bin))
      {
        /*
          Copying from BINARY to UCS2 needs to prepend zeros sometimes:
          INSERT INTO t1 (ucs2_column) VALUES (0x01);
          0x01 -> 0x0001
        */
        uint pad_length= to_cs->mbminlen - from_offset;
        bzero(to, pad_length);
        memmove(to + pad_length, from, from_offset);
Alexander Barkov's avatar
Alexander Barkov committed
956 957 958 959 960 961 962 963 964 965 966 967 968 969
        /*
          In some cases left zero-padding can create an incorrect character.
          For example:
            INSERT INTO t1 (utf32_column) VALUES (0x110000);
          We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
          The valid characters range is limited to 0x00000000..0x0010FFFF.
          
          Make sure we didn't pad to an incorrect character.
        */
        if (to_cs->cset->well_formed_len(to_cs,
                                         to, to + to_cs->mbminlen, 1,
                                         &well_formed_error) !=
                                         to_cs->mbminlen)
        {
970 971
          m_source_end_pos= m_well_formed_error_pos= from;
          m_cannot_convert_error_pos= NULL;
Alexander Barkov's avatar
Alexander Barkov committed
972 973
          return 0;
        }
974 975 976 977 978 979 980 981 982 983 984
        nchars--;
        from+= from_offset;
        from_length-= from_offset;
        to+= to_cs->mbminlen;
        to_length-= to_cs->mbminlen;
      }

      set_if_smaller(from_length, to_length);
      res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
                                        nchars, &well_formed_error);
      memmove(to, from, res);
985 986 987
      m_source_end_pos= from + res;
      m_well_formed_error_pos= well_formed_error ? from + res : NULL;
      m_cannot_convert_error_pos= NULL;
988 989 990 991 992 993 994 995
      if (from_offset)
        res+= to_cs->mbminlen;
    }
  }
  else
  {
    int cnvres;
    my_wc_t wc;
996 997
    my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
    my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
998 999 1000
    const uchar *from_end= (const uchar*) from + from_length;
    uchar *to_end= (uchar*) to + to_length;
    char *to_start= to;
1001 1002
    m_well_formed_error_pos= NULL;
    m_cannot_convert_error_pos= NULL;
1003 1004 1005 1006 1007 1008 1009 1010

    for ( ; nchars; nchars--)
    {
      const char *from_prev= from;
      if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
        from+= cnvres;
      else if (cnvres == MY_CS_ILSEQ)
      {
1011 1012
        if (!m_well_formed_error_pos)
          m_well_formed_error_pos= from;
1013 1014 1015 1016 1017 1018 1019 1020 1021
        from++;
        wc= '?';
      }
      else if (cnvres > MY_CS_TOOSMALL)
      {
        /*
          A correct multibyte sequence detected
          But it doesn't have Unicode mapping.
        */
1022 1023
        if (!m_cannot_convert_error_pos)
          m_cannot_convert_error_pos= from;
1024 1025 1026 1027
        from+= (-cnvres);
        wc= '?';
      }
      else
1028 1029 1030 1031
      {
        if ((uchar *) from >= from_end)
          break; // End of line
        // Incomplete byte sequence
1032 1033
        if (!m_well_formed_error_pos)
          m_well_formed_error_pos= from;
1034 1035 1036
        from++;
        wc= '?';
      }
1037 1038 1039 1040 1041
outp:
      if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
        to+= cnvres;
      else if (cnvres == MY_CS_ILUNI && wc != '?')
      {
1042 1043
        if (!m_cannot_convert_error_pos)
          m_cannot_convert_error_pos= from_prev;
1044 1045 1046 1047
        wc= '?';
        goto outp;
      }
      else
1048 1049
      {
        from= from_prev;
1050
        break;
1051
      }
1052
    }
1053
    m_source_end_pos= from;
1054
    res= (uint) (to - to_start);
1055
  }
1056
  return res;
1057 1058 1059 1060
}



unknown's avatar
unknown committed
1061 1062
/*
  Append characters to a single-quoted string '...', escaping special
1063
  characters with backslashes as necessary.
unknown's avatar
unknown committed
1064 1065
  Does not add the enclosing quotes, this is left up to caller.
*/
1066 1067
#define APPEND(X)   if (append(X)) return 1; else break
bool String::append_for_single_quote(const char *st, uint len)
unknown's avatar
unknown committed
1068
{
unknown's avatar
unknown committed
1069
  const char *end= st+len;
1070
  for (; st < end; st++)
unknown's avatar
unknown committed
1071 1072 1073 1074
  {
    uchar c= *st;
    switch (c)
    {
1075 1076 1077 1078 1079 1080 1081
    case '\\':   APPEND(STRING_WITH_LEN("\\\\"));
    case '\0':   APPEND(STRING_WITH_LEN("\\0"));
    case '\'':   APPEND(STRING_WITH_LEN("\\'"));
    case '\n':   APPEND(STRING_WITH_LEN("\\n"));
    case '\r':   APPEND(STRING_WITH_LEN("\\r"));
    case '\032': APPEND(STRING_WITH_LEN("\\Z"));
    default:     APPEND(c);
unknown's avatar
unknown committed
1082 1083
    }
  }
1084
  return 0;
unknown's avatar
unknown committed
1085
}
1086

1087
void String::print(String *str) const
unknown's avatar
unknown committed
1088 1089 1090
{
  str->append_for_single_quote(Ptr, str_length);
}
1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109

/*
  Exchange state of this object and argument.

  SYNOPSIS
    String::swap()

  RETURN
    Target string will contain state of this object and vice versa.
*/

void String::swap(String &s)
{
  swap_variables(char *, Ptr, s.Ptr);
  swap_variables(uint32, str_length, s.str_length);
  swap_variables(uint32, Alloced_length, s.Alloced_length);
  swap_variables(bool, alloced, s.alloced);
  swap_variables(CHARSET_INFO*, str_charset, s.str_charset);
}
1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141


/**
  Convert string to printable ASCII string

  @details This function converts input string "from" replacing non-ASCII bytes
  with hexadecimal sequences ("\xXX") optionally appending "..." to the end of
  the resulting string.
  This function used in the ER_TRUNCATED_WRONG_VALUE_FOR_FIELD error messages,
  e.g. when a string cannot be converted to a result charset.


  @param    to          output buffer
  @param    to_len      size of the output buffer (8 bytes or greater)
  @param    from        input string
  @param    from_len    size of the input string
  @param    from_cs     input charset
  @param    nbytes      maximal number of bytes to convert (from_len if 0)

  @return   number of bytes in the output string
*/

uint convert_to_printable(char *to, size_t to_len,
                          const char *from, size_t from_len,
                          CHARSET_INFO *from_cs, size_t nbytes /*= 0*/)
{
  /* needs at least 8 bytes for '\xXX...' and zero byte */
  DBUG_ASSERT(to_len >= 8);

  char *t= to;
  char *t_end= to + to_len - 1; // '- 1' is for the '\0' at the end
  const char *f= from;
1142
  const char *f_end= from + (nbytes ? MY_MIN(from_len, nbytes) : from_len);
1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182
  char *dots= to; // last safe place to append '...'

  if (!f || t == t_end)
    return 0;

  for (; t < t_end && f < f_end; f++)
  {
    /*
      If the source string is ASCII compatible (mbminlen==1)
      and the source character is in ASCII printable range (0x20..0x7F),
      then display the character as is.
      
      Otherwise, if the source string is not ASCII compatible (e.g. UCS2),
      or the source character is not in the printable range,
      then print the character using HEX notation.
    */
    if (((unsigned char) *f) >= 0x20 &&
        ((unsigned char) *f) <= 0x7F &&
        from_cs->mbminlen == 1)
    {
      *t++= *f;
    }
    else
    {
      if (t_end - t < 4) // \xXX
        break;
      *t++= '\\';
      *t++= 'x';
      *t++= _dig_vec_upper[((unsigned char) *f) >> 4];
      *t++= _dig_vec_upper[((unsigned char) *f) & 0x0F];
    }
    if (t_end - t >= 3) // '...'
      dots= t;
  }
  if (f < from + from_len)
    memcpy(dots, STRING_WITH_LEN("...\0"));
  else
    *t= '\0';
  return t - to;
}