Commit 6eae037c authored by Alexander Barkov's avatar Alexander Barkov

MDEV-17474 Change Unicode collation implementation from "handler" to "inline" style

parent fee24b12
......@@ -362,7 +362,6 @@ extern MY_COLLATION_HANDLER my_collation_8bit_bin_handler;
extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler;
extern MY_COLLATION_HANDLER my_collation_8bit_nopad_bin_handler;
extern MY_COLLATION_HANDLER my_collation_8bit_simple_nopad_ci_handler;
extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
/* Some typedef to make it easy for C++ to make function pointers */
typedef int (*my_charset_conv_mb_wc)(CHARSET_INFO *, my_wc_t *,
......
......@@ -31158,17 +31158,6 @@ typedef struct my_uca_scanner_st
CHARSET_INFO *cs;
} my_uca_scanner;
/*
Charset dependent scanner part, to optimize
some character sets.
*/
typedef struct my_uca_scanner_handler_st
{
void (*init)(my_uca_scanner *scanner, CHARSET_INFO *cs,
const MY_UCA_WEIGHT_LEVEL *level,
const uchar *str, size_t length);
int (*next)(my_uca_scanner *scanner);
} my_uca_scanner_handler;
static const uint16 nochar[]= {0,0};
......@@ -31675,223 +31664,6 @@ my_uca_scanner_init_any(my_uca_scanner *scanner,
scanner->cs= cs;
}
static int my_uca_scanner_next_any(my_uca_scanner *scanner)
{
/*
Check if the weights for the previous character have been
already fully scanned. If yes, then get the next character and
initialize wbeg and wlength to its weight string.
*/
if (scanner->wbeg[0]) /* More weights left from the previous step: */
return *scanner->wbeg++; /* return the next weight from expansion */
do
{
const uint16 *wpage;
my_wc_t wc[MY_UCA_MAX_CONTRACTION];
int mblen;
/* Get next character */
if (((mblen= scanner->cs->cset->mb_wc(scanner->cs, wc,
scanner->sbeg,
scanner->send)) <= 0))
{
if (scanner->sbeg >= scanner->send)
return -1; /* No more bytes, end of line reached */
/*
There are some more bytes left. Non-positive mb_len means that
we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
*/
if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send)
{
/* For safety purposes don't go beyond the string range. */
scanner->sbeg= scanner->send;
}
/*
Treat every complete or incomplete mbminlen unit as a weight which is
greater than weight for any possible normal character.
0xFFFF is greater than any possible weight in the UCA weight table.
*/
return 0xFFFF;
}
scanner->sbeg+= mblen;
if (wc[0] > scanner->level->maxchar)
{
/* Return 0xFFFD as weight for all characters outside BMP */
scanner->wbeg= nochar;
return 0xFFFD;
}
if (my_uca_have_contractions_quick(scanner->level))
{
uint16 *cweight;
/*
If we have scanned a character which can have previous context,
and there were some more characters already before,
then reconstruct codepoint of the previous character
from "page" and "code" into w[1], and verify that {wc[1], wc[0]}
together form a real previous context pair.
Note, we support only 2-character long sequences with previous
context at the moment. CLDR does not have longer sequences.
*/
if (my_uca_can_be_previous_context_tail(&scanner->level->contractions,
wc[0]) &&
scanner->wbeg != nochar && /* if not the very first character */
my_uca_can_be_previous_context_head(&scanner->level->contractions,
(wc[1]= ((scanner->page << 8) +
scanner->code))) &&
(cweight= my_uca_previous_context_find(scanner, wc[1], wc[0])))
{
scanner->page= scanner->code= 0; /* Clear for the next character */
return *cweight;
}
else if (my_uca_can_be_contraction_head(&scanner->level->contractions,
wc[0]))
{
/* Check if w[0] starts a contraction */
if ((cweight= my_uca_scanner_contraction_find(scanner, wc)))
return *cweight;
}
}
/* Process single character */
scanner->page= wc[0] >> 8;
scanner->code= wc[0] & 0xFF;
/* If weight page for w[0] does not exist, then calculate algoritmically */
if (!(wpage= scanner->level->weights[scanner->page]))
return my_uca_scanner_next_implicit(scanner);
/* Calculate pointer to w[0]'s weight, using page and offset */
scanner->wbeg= wpage +
scanner->code * scanner->level->lengths[scanner->page];
} while (!scanner->wbeg[0]); /* Skip ignorable characters */
return *scanner->wbeg++;
}
static my_uca_scanner_handler my_any_uca_scanner_handler=
{
my_uca_scanner_init_any,
my_uca_scanner_next_any
};
/*
Compares two strings according to the collation
SYNOPSIS:
my_strnncoll_uca()
cs Character set information
s First string
slen First string length
t Second string
tlen Seconf string length
level DUCETweight level
NOTES:
Initializes two weight scanners and gets weights
corresponding to two strings in a loop. If weights are not
the same at some step then returns their difference.
In the while() comparison these situations are possible:
1. (s_res>0) and (t_res>0) and (s_res == t_res)
Weights are the same so far, continue comparison
2. (s_res>0) and (t_res>0) and (s_res!=t_res)
A difference has been found, return.
3. (s_res>0) and (t_res<0)
We have reached the end of the second string, or found
an illegal multibyte sequence in the second string.
Return a positive number, i.e. the first string is bigger.
4. (s_res<0) and (t_res>0)
We have reached the end of the first string, or found
an illegal multibyte sequence in the first string.
Return a negative number, i.e. the second string is bigger.
5. (s_res<0) and (t_res<0)
Both scanners returned -1. It means we have riched
the end-of-string of illegal-sequence in both strings
at the same time. Return 0, strings are equal.
RETURN
Difference between two strings, according to the collation:
0 - means strings are equal
negative number - means the first string is smaller
positive number - means the first string is bigger
*/
static int my_strnncoll_uca_onelevel(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler,
const MY_UCA_WEIGHT_LEVEL *level,
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
my_bool t_is_prefix)
{
my_uca_scanner sscanner;
my_uca_scanner tscanner;
int s_res;
int t_res;
scanner_handler->init(&sscanner, cs, level, s, slen);
scanner_handler->init(&tscanner, cs, level, t, tlen);
do
{
s_res= scanner_handler->next(&sscanner);
t_res= scanner_handler->next(&tscanner);
} while ( s_res == t_res && s_res >0);
return (t_is_prefix && t_res < 0) ? 0 : (s_res - t_res);
}
static int my_strnncoll_uca(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler,
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
my_bool t_is_prefix)
{
return my_strnncoll_uca_onelevel(cs, scanner_handler, &cs->uca->level[0],
s, slen, t, tlen, t_is_prefix);
}
static int my_strnncoll_uca_multilevel(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler,
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
my_bool t_is_prefix)
{
uint num_level= cs->levels_for_order;
uint i;
for (i= 0; i != num_level; i++)
{
int ret= my_strnncoll_uca_onelevel(cs, scanner_handler, &cs->uca->level[i],
s, slen, t, tlen, t_is_prefix);
if (ret)
return ret;
}
return 0;
}
static int
my_strnncollsp_generic_uca_nopad_multilevel(CHARSET_INFO *cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen)
{
uint num_level= cs->levels_for_order;
uint i;
for (i= 0; i != num_level; i++)
{
int ret= my_strnncoll_uca_onelevel(cs, &my_any_uca_scanner_handler,
&cs->uca->level[i],
s, slen, t, tlen, FALSE);
if (ret)
return ret;
}
return 0;
}
static inline int
my_space_weight(const MY_UCA_WEIGHT_LEVEL *level)
......@@ -31924,258 +31696,6 @@ my_char_weight_addr(const MY_UCA_WEIGHT_LEVEL *level, uint wc)
}
/*
Compares two strings according to the collation,
ignoring trailing spaces.
SYNOPSIS:
my_strnncollsp_uca()
cs Character set information
s First string
slen First string length
t Second string
tlen Seconf string length
level DUCETweight level
NOTES:
Works exactly the same with my_strnncoll_uca(),
but ignores trailing spaces.
In the while() comparison these situations are possible:
1. (s_res>0) and (t_res>0) and (s_res == t_res)
Weights are the same so far, continue comparison
2. (s_res>0) and (t_res>0) and (s_res!=t_res)
A difference has been found, return.
3. (s_res>0) and (t_res<0)
We have reached the end of the second string, or found
an illegal multibyte sequence in the second string.
Compare the first string to an infinite array of
space characters until difference is found, or until
the end of the first string.
4. (s_res<0) and (t_res>0)
We have reached the end of the first string, or found
an illegal multibyte sequence in the first string.
Compare the second string to an infinite array of
space characters until difference is found or until
the end of the second steing.
5. (s_res<0) and (t_res<0)
Both scanners returned -1. It means we have riched
the end-of-string of illegal-sequence in both strings
at the same time. Return 0, strings are equal.
RETURN
Difference between two strings, according to the collation:
0 - means strings are equal
negative number - means the first string is smaller
positive number - means the first string is bigger
*/
static int my_strnncollsp_uca_onelevel(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler,
const MY_UCA_WEIGHT_LEVEL *level,
const uchar *s, size_t slen,
const uchar *t, size_t tlen)
{
my_uca_scanner sscanner, tscanner;
int s_res, t_res;
scanner_handler->init(&sscanner, cs, level, s, slen);
scanner_handler->init(&tscanner, cs, level, t, tlen);
do
{
s_res= scanner_handler->next(&sscanner);
t_res= scanner_handler->next(&tscanner);
} while ( s_res == t_res && s_res >0);
if (s_res > 0 && t_res < 0)
{
/* Calculate weight for SPACE character */
t_res= my_space_weight(level);
/* compare the first string to spaces */
do
{
if (s_res != t_res)
return (s_res - t_res);
s_res= scanner_handler->next(&sscanner);
} while (s_res > 0);
return 0;
}
if (s_res < 0 && t_res > 0)
{
/* Calculate weight for SPACE character */
s_res= my_space_weight(level);
/* compare the second string to spaces */
do
{
if (s_res != t_res)
return (s_res - t_res);
t_res= scanner_handler->next(&tscanner);
} while (t_res > 0);
return 0;
}
return ( s_res - t_res );
}
static int my_strnncollsp_uca(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler,
const uchar *s, size_t slen,
const uchar *t, size_t tlen)
{
return my_strnncollsp_uca_onelevel(cs, scanner_handler, &cs->uca->level[0],
s, slen, t, tlen);
}
static int my_strnncollsp_uca_multilevel(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler,
const uchar *s, size_t slen,
const uchar *t, size_t tlen)
{
uint num_level= cs->levels_for_order;
uint i;
for (i= 0; i != num_level; i++)
{
int ret= my_strnncollsp_uca_onelevel(cs, scanner_handler,
&cs->uca->level[i], s, slen, t, tlen);
if (ret)
return ret;
}
return 0;
}
/*
Calculates hash value for the given string,
according to the collation, and ignoring trailing spaces.
SYNOPSIS:
my_hash_sort_uca()
cs Character set information
s String
slen String's length
n1 First hash parameter
n2 Second hash parameter
NOTES:
Scans consequently weights and updates
hash parameters n1 and n2. In a case insensitive collation,
upper and lower case of the same letter will return the same
weight sequence, and thus will produce the same hash values
in n1 and n2.
This functions is used for one-level and for multi-level collations.
We intentionally use only primary level in multi-level collations.
This helps to have PARTITION BY KEY put primarily equal records
into the same partition. E.g. in utf8_thai_520_ci records that differ
only in tone marks go into the same partition.
RETURN
N/A
*/
static void my_hash_sort_uca(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler,
const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
int s_res;
my_uca_scanner scanner;
int space_weight= my_space_weight(&cs->uca->level[0]);
register ulong m1= *nr1, m2= *nr2;
scanner_handler->init(&scanner, cs, &cs->uca->level[0], s, slen);
while ((s_res= scanner_handler->next(&scanner)) >0)
{
if (s_res == space_weight)
{
/* Combine all spaces to be able to skip end spaces */
uint count= 0;
do
{
count++;
if ((s_res= scanner_handler->next(&scanner)) <= 0)
{
/* Skip strings at end of string */
goto end;
}
}
while (s_res == space_weight);
/* Add back that has for the space characters */
do
{
/*
We can't use MY_HASH_ADD_16() here as we, because of a misstake
in the original code, where we added the 16 byte variable the
opposite way. Changing this would cause old partitioned tables
to fail.
*/
MY_HASH_ADD(m1, m2, space_weight >> 8);
MY_HASH_ADD(m1, m2, space_weight & 0xFF);
}
while (--count != 0);
}
/* See comment above why we can't use MY_HASH_ADD_16() */
MY_HASH_ADD(m1, m2, s_res >> 8);
MY_HASH_ADD(m1, m2, s_res & 0xFF);
}
end:
*nr1= m1;
*nr2= m2;
}
static void my_hash_sort_uca_nopad(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler,
const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
int s_res;
my_uca_scanner scanner;
register ulong m1= *nr1, m2= *nr2;
scanner_handler->init(&scanner, cs, &cs->uca->level[0], s, slen);
while ((s_res= scanner_handler->next(&scanner)) >0)
{
/* See comment above why we can't use MY_HASH_ADD_16() */
MY_HASH_ADD(m1, m2, s_res >> 8);
MY_HASH_ADD(m1, m2, s_res & 0xFF);
}
*nr1= m1;
*nr2= m2;
}
static uchar *
my_strnxfrm_uca_onelevel_internal(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler,
MY_UCA_WEIGHT_LEVEL *level,
uchar *dst, uchar *de, uint *nweights,
const uchar *src, size_t srclen)
{
my_uca_scanner scanner;
int s_res;
DBUG_ASSERT(src || !srclen);
scanner_handler->init(&scanner, cs, level, src, srclen);
for (; dst < de && *nweights &&
(s_res= scanner_handler->next(&scanner)) > 0 ; (*nweights)--)
{
*dst++= s_res >> 8;
if (dst < de)
*dst++= s_res & 0xFF;
}
return dst;
}
static uchar *
my_strnxfrm_uca_padn(uchar *dst, uchar *de, uint nweights, int weight)
{
......@@ -32202,27 +31722,6 @@ my_strnxfrm_uca_pad(uchar *dst, uchar *de, int weight)
}
static uchar *
my_strnxfrm_uca_onelevel(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler,
MY_UCA_WEIGHT_LEVEL *level,
uchar *dst, uchar *de, uint nweights,
const uchar *src, size_t srclen, uint flags)
{
uchar *d0= dst;
dst= my_strnxfrm_uca_onelevel_internal(cs, scanner_handler, level,
dst, de, &nweights,
src, srclen);
DBUG_ASSERT(dst <= de);
if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
dst= my_strnxfrm_uca_padn(dst, de, nweights, my_space_weight(level));
DBUG_ASSERT(dst <= de);
my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
return dst;
}
/*
Return the minimum possible weight on a level.
*/
......@@ -32233,136 +31732,6 @@ static uint min_weight_on_level(MY_UCA_WEIGHT_LEVEL *level)
}
static uchar *
my_strnxfrm_uca_nopad_onelevel(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler,
MY_UCA_WEIGHT_LEVEL *level,
uchar *dst, uchar *de, uint nweights,
const uchar *src, size_t srclen, uint flags)
{
uchar *d0= dst;
dst= my_strnxfrm_uca_onelevel_internal(cs, scanner_handler, level,
dst, de, &nweights,
src, srclen);
DBUG_ASSERT(dst <= de);
/* Pad with the minimum possible weight on this level */
if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
dst= my_strnxfrm_uca_padn(dst, de, nweights, min_weight_on_level(level));
DBUG_ASSERT(dst <= de);
my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
return dst;
}
/*
For the given string creates its "binary image", suitable
to be used in binary comparison, i.e. in memcmp().
SYNOPSIS:
my_strnxfrm_uca()
cs Character set information
dst Where to write the image
dstlen Space available for the image, in bytes
src The source string
srclen Length of the source string, in bytes
NOTES:
In a loop, scans weights from the source string and writes
them into the binary image. In a case insensitive collation,
upper and lower cases of the same letter will produce the
same image subsequences. When we have reached the end-of-string
or found an illegal multibyte sequence, the loop stops.
It is impossible to restore the original string using its
binary image.
Binary images are used for bulk comparison purposes,
e.g. in ORDER BY, when it is more efficient to create
a binary image and use it instead of weight scanner
for the original strings for every comparison.
RETURN
Number of bytes that have been written into the binary image.
*/
static size_t
my_strnxfrm_uca(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler,
uchar *dst, size_t dstlen, uint nweights,
const uchar *src, size_t srclen, uint flags)
{
uchar *d0= dst;
uchar *de= dst + dstlen;
dst= my_strnxfrm_uca_onelevel(cs, scanner_handler, &cs->uca->level[0],
dst, de, nweights, src, srclen, flags);
/*
This can probably be changed to memset(dst, 0, de - dst),
like my_strnxfrm_uca_multilevel() does.
*/
if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
dst= my_strnxfrm_uca_pad(dst, de, my_space_weight(&cs->uca->level[0]));
return dst - d0;
}
static size_t
my_strnxfrm_uca_nopad(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler,
uchar *dst, size_t dstlen, uint nweights,
const uchar *src, size_t srclen, uint flags)
{
uchar *d0= dst;
uchar *de= dst + dstlen;
dst= my_strnxfrm_uca_nopad_onelevel(cs, scanner_handler, &cs->uca->level[0],
dst, de, nweights, src, srclen, flags);
if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
{
memset(dst, 0, de - dst);
dst= de;
}
return dst - d0;
}
static size_t
my_strnxfrm_uca_multilevel(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler,
uchar *dst, size_t dstlen, uint nweights,
const uchar *src, size_t srclen, uint flags)
{
uint num_level= cs->levels_for_order;
uchar *d0= dst;
uchar *de= dst + dstlen;
uint current_level;
for (current_level= 0; current_level != num_level; current_level++)
{
if (!(flags & MY_STRXFRM_LEVEL_ALL) ||
(flags & (MY_STRXFRM_LEVEL1 << current_level)))
dst= cs->state & MY_CS_NOPAD ?
my_strnxfrm_uca_nopad_onelevel(cs, scanner_handler,
&cs->uca->level[current_level],
dst, de, nweights,
src, srclen, flags) :
my_strnxfrm_uca_onelevel(cs, scanner_handler,
&cs->uca->level[current_level],
dst, de, nweights,
src, srclen, flags);
}
if (dst < de && (flags & MY_STRXFRM_PAD_TO_MAXLEN))
{
memset(dst, 0, de - dst);
dst= de;
}
return dst - d0;
}
/*
This function compares if two characters are the same.
The sign +1 or -1 does not matter. The only
......@@ -34248,8 +33617,46 @@ init_weight_level(MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules,
}
MY_COLLATION_HANDLER my_collation_any_uca_handler_multilevel;
MY_COLLATION_HANDLER my_collation_generic_uca_nopad_handler_multilevel;
static my_bool
create_tailoring(struct charset_info_st *cs,
MY_CHARSET_LOADER *loader);
static my_bool
my_coll_init_uca(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
{
cs->pad_char= ' ';
cs->ctype= my_charset_utf8_unicode_ci.ctype;
if (!cs->caseinfo)
cs->caseinfo= &my_unicase_default;
return create_tailoring(cs, loader);
}
static size_t my_strnxfrmlen_any_uca(CHARSET_INFO *cs, size_t len)
{
/* UCA uses 2 bytes per weight */
return (len + cs->mbmaxlen - 1) / cs->mbmaxlen * cs->strxfrm_multiply * 2;
}
static size_t my_strnxfrmlen_any_uca_multilevel(CHARSET_INFO *cs, size_t len)
{
return my_strnxfrmlen_any_uca(cs, len) * cs->levels_for_order;
}
/*
Define generic collation handlers for multi-level collations with tailoring:
my_uca_collation_handler_nopad_multilevel_generic
my_uca_collation_handler_multilevel_generic
TODO: Use faster character-set specific versions of MY_COLLATION_HANDLER
instead of generic.
*/
#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _generic
#define MY_MB_WC(scanner, wc, beg, end) (scanner->cs->cset->mb_wc(scanner->cs, wc, beg, end))
#define MY_LIKE_RANGE my_like_range_generic
#include "ctype-uca.ic"
/*
......@@ -34334,8 +33741,8 @@ create_tailoring(struct charset_info_st *cs,
cs->uca[0]= new_uca;
if (cs->levels_for_order > 1)
cs->coll= (cs->state & MY_CS_NOPAD) ?
&my_collation_generic_uca_nopad_handler_multilevel :
&my_collation_any_uca_handler_multilevel;
&my_uca_collation_handler_nopad_multilevel_generic :
&my_uca_collation_handler_multilevel_generic;
ex:
(loader->free)(rules.rule);
......@@ -34344,235 +33751,14 @@ create_tailoring(struct charset_info_st *cs,
return rc;
}
/*
Universal CHARSET_INFO compatible wrappers
for the above internal functions.
Should work for any character set.
*/
static my_bool
my_coll_init_uca(struct charset_info_st *cs, MY_CHARSET_LOADER *loader)
{
cs->pad_char= ' ';
cs->ctype= my_charset_utf8_unicode_ci.ctype;
if (!cs->caseinfo)
cs->caseinfo= &my_unicase_default;
return create_tailoring(cs, loader);
}
static int my_strnncoll_any_uca(CHARSET_INFO *cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
my_bool t_is_prefix)
{
return my_strnncoll_uca(cs, &my_any_uca_scanner_handler,
s, slen, t, tlen, t_is_prefix);
}
static int my_strnncoll_any_uca_multilevel(CHARSET_INFO *cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
my_bool t_is_prefix)
{
return my_strnncoll_uca_multilevel(cs, &my_any_uca_scanner_handler,
s, slen, t, tlen, t_is_prefix);
}
static int my_strnncollsp_any_uca(CHARSET_INFO *cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen)
{
return my_strnncollsp_uca(cs, &my_any_uca_scanner_handler, s, slen, t, tlen);
}
static int my_strnncollsp_generic_uca_nopad(CHARSET_INFO *cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen)
{
return my_strnncoll_uca(cs, &my_any_uca_scanner_handler,
s, slen, t, tlen, FALSE);
}
static int my_strnncollsp_any_uca_multilevel(CHARSET_INFO *cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen)
{
return my_strnncollsp_uca_multilevel(cs, &my_any_uca_scanner_handler,
s, slen, t, tlen);
}
static void my_hash_sort_any_uca(CHARSET_INFO *cs,
const uchar *s, size_t slen,
ulong *n1, ulong *n2)
{
my_hash_sort_uca(cs, &my_any_uca_scanner_handler, s, slen, n1, n2);
}
static void my_hash_sort_generic_uca_nopad(CHARSET_INFO *cs,
const uchar *s, size_t slen,
ulong *n1, ulong *n2)
{
my_hash_sort_uca_nopad(cs, &my_any_uca_scanner_handler, s, slen, n1, n2);
}
static size_t my_strnxfrm_any_uca(CHARSET_INFO *cs,
uchar *dst, size_t dstlen, uint nweights,
const uchar *src, size_t srclen, uint flags)
{
return my_strnxfrm_uca(cs, &my_any_uca_scanner_handler,
dst, dstlen, nweights, src, srclen, flags);
}
static size_t my_strnxfrm_generic_uca_nopad(CHARSET_INFO *cs,
uchar *dst, size_t dstlen,
uint nweights,
const uchar *src, size_t srclen,
uint flags)
{
return my_strnxfrm_uca_nopad(cs, &my_any_uca_scanner_handler,
dst, dstlen, nweights, src, srclen, flags);
}
static size_t my_strnxfrm_any_uca_multilevel(CHARSET_INFO *cs,
uchar *dst, size_t dstlen,
uint nweights, const uchar *src,
size_t srclen, uint flags)
{
return my_strnxfrm_uca_multilevel(cs, &my_any_uca_scanner_handler,
dst, dstlen, nweights, src, srclen,
flags);
}
static size_t my_strnxfrmlen_any_uca(CHARSET_INFO *cs, size_t len)
{
/* UCA uses 2 bytes per weight */
return (len + cs->mbmaxlen - 1) / cs->mbmaxlen * cs->strxfrm_multiply * 2;
}
static size_t my_strnxfrmlen_any_uca_multilevel(CHARSET_INFO *cs, size_t len)
{
return my_strnxfrmlen_any_uca(cs, len) * cs->levels_for_order;
}
/* NO PAD handler for character sets with mbminlen==1 */
MY_COLLATION_HANDLER my_collation_mb_uca_nopad_handler =
{
my_coll_init_uca,
my_strnncoll_any_uca,
my_strnncollsp_generic_uca_nopad,
my_strnxfrm_generic_uca_nopad,
my_strnxfrmlen_any_uca,
my_like_range_mb,
my_wildcmp_uca,
NULL,
my_instr_mb,
my_hash_sort_generic_uca_nopad,
my_propagate_complex
};
/* NO PAD handler for character sets with mbminlen>=1 */
MY_COLLATION_HANDLER my_collation_generic_uca_nopad_handler =
{
my_coll_init_uca,
my_strnncoll_any_uca,
my_strnncollsp_generic_uca_nopad,
my_strnxfrm_generic_uca_nopad,
my_strnxfrmlen_any_uca,
my_like_range_generic,
my_wildcmp_uca,
NULL,
my_instr_mb,
my_hash_sort_generic_uca_nopad,
my_propagate_complex
};
MY_COLLATION_HANDLER my_collation_any_uca_handler_multilevel=
{
my_coll_init_uca,
my_strnncoll_any_uca_multilevel,
my_strnncollsp_any_uca_multilevel,
my_strnxfrm_any_uca_multilevel,
my_strnxfrmlen_any_uca_multilevel,
my_like_range_generic,
my_wildcmp_uca,
NULL,
my_instr_mb,
my_hash_sort_any_uca,
my_propagate_complex
};
MY_COLLATION_HANDLER my_collation_generic_uca_nopad_handler_multilevel =
{
my_coll_init_uca,
my_strnncoll_any_uca_multilevel,
my_strnncollsp_generic_uca_nopad_multilevel,
my_strnxfrm_any_uca_multilevel,
my_strnxfrmlen_any_uca_multilevel,
my_like_range_generic,
my_wildcmp_uca,
NULL,
my_instr_mb,
my_hash_sort_generic_uca_nopad,
my_propagate_complex
};
#ifdef HAVE_CHARSET_ucs2
/*
UCS2 optimized CHARSET_INFO compatible wrappers.
*/
static int my_strnncoll_ucs2_uca(CHARSET_INFO *cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
my_bool t_is_prefix)
{
return my_strnncoll_uca(cs, &my_any_uca_scanner_handler,
s, slen, t, tlen, t_is_prefix);
}
static int my_strnncollsp_ucs2_uca(CHARSET_INFO *cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen)
{
return my_strnncollsp_uca(cs, &my_any_uca_scanner_handler, s, slen, t, tlen);
}
static void my_hash_sort_ucs2_uca(CHARSET_INFO *cs,
const uchar *s, size_t slen,
ulong *n1, ulong *n2)
{
my_hash_sort_uca(cs, &my_any_uca_scanner_handler, s, slen, n1, n2);
}
static size_t my_strnxfrm_ucs2_uca(CHARSET_INFO *cs,
uchar *dst, size_t dstlen, uint nweights,
const uchar *src, size_t srclen, uint flags)
{
return my_strnxfrm_uca(cs, &my_any_uca_scanner_handler,
dst, dstlen, nweights, src, srclen, flags);
}
MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
{
my_coll_init_uca, /* init */
my_strnncoll_ucs2_uca,
my_strnncollsp_ucs2_uca,
my_strnxfrm_ucs2_uca,
my_strnxfrmlen_any_uca,
my_like_range_generic,
my_wildcmp_uca,
NULL,
my_instr_mb,
my_hash_sort_ucs2_uca,
my_propagate_complex
};
#include "ctype-ucs2.h"
#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _ucs2
#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_ucs2_quick(wc, beg, end))
#define MY_LIKE_RANGE my_like_range_generic
#include "ctype-uca.ic"
#define MY_CS_UCS2_UCA_FLAGS (MY_CS_COMMON_UCA_FLAGS|MY_CS_NONASCII)
......@@ -34607,7 +33793,7 @@ struct charset_info_st my_charset_ucs2_unicode_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_icelandic_uca_ci=
......@@ -34639,7 +33825,7 @@ struct charset_info_st my_charset_ucs2_icelandic_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_latvian_uca_ci=
......@@ -34671,7 +33857,7 @@ struct charset_info_st my_charset_ucs2_latvian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_romanian_uca_ci=
......@@ -34703,7 +33889,7 @@ struct charset_info_st my_charset_ucs2_romanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_slovenian_uca_ci=
......@@ -34735,7 +33921,7 @@ struct charset_info_st my_charset_ucs2_slovenian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_polish_uca_ci=
......@@ -34767,7 +33953,7 @@ struct charset_info_st my_charset_ucs2_polish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_estonian_uca_ci=
......@@ -34799,7 +33985,7 @@ struct charset_info_st my_charset_ucs2_estonian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_spanish_uca_ci=
......@@ -34831,7 +34017,7 @@ struct charset_info_st my_charset_ucs2_spanish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_swedish_uca_ci=
......@@ -34863,7 +34049,7 @@ struct charset_info_st my_charset_ucs2_swedish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_turkish_uca_ci=
......@@ -34895,7 +34081,7 @@ struct charset_info_st my_charset_ucs2_turkish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_czech_uca_ci=
......@@ -34927,7 +34113,7 @@ struct charset_info_st my_charset_ucs2_czech_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
......@@ -34960,7 +34146,7 @@ struct charset_info_st my_charset_ucs2_danish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_lithuanian_uca_ci=
......@@ -34992,7 +34178,7 @@ struct charset_info_st my_charset_ucs2_lithuanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_slovak_uca_ci=
......@@ -35024,7 +34210,7 @@ struct charset_info_st my_charset_ucs2_slovak_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_spanish2_uca_ci=
......@@ -35056,7 +34242,7 @@ struct charset_info_st my_charset_ucs2_spanish2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
......@@ -35089,7 +34275,7 @@ struct charset_info_st my_charset_ucs2_roman_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
......@@ -35122,7 +34308,7 @@ struct charset_info_st my_charset_ucs2_persian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
......@@ -35155,7 +34341,7 @@ struct charset_info_st my_charset_ucs2_esperanto_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
......@@ -35188,7 +34374,7 @@ struct charset_info_st my_charset_ucs2_hungarian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_sinhala_uca_ci=
......@@ -35220,7 +34406,7 @@ struct charset_info_st my_charset_ucs2_sinhala_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
......@@ -35254,7 +34440,7 @@ struct charset_info_st my_charset_ucs2_german2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
struct charset_info_st my_charset_ucs2_croatian_mysql561_uca_ci=
......@@ -35286,7 +34472,7 @@ struct charset_info_st my_charset_ucs2_croatian_mysql561_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
......@@ -35319,7 +34505,7 @@ struct charset_info_st my_charset_ucs2_croatian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
......@@ -35352,7 +34538,7 @@ struct charset_info_st my_charset_ucs2_myanmar_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
......@@ -35385,7 +34571,7 @@ struct charset_info_st my_charset_ucs2_thai_520_w2=
0, /* escape_with_backslash_is_dangerous */
2, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_any_uca_handler_multilevel
&my_uca_collation_handler_multilevel_ucs2
};
struct charset_info_st my_charset_ucs2_unicode_520_ci=
......@@ -35417,7 +34603,7 @@ struct charset_info_st my_charset_ucs2_unicode_520_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
......@@ -35450,7 +34636,7 @@ struct charset_info_st my_charset_ucs2_vietnamese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
&my_uca_collation_handler_ucs2
};
......@@ -35483,7 +34669,7 @@ struct charset_info_st my_charset_ucs2_unicode_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_generic_uca_nopad_handler
&my_uca_collation_handler_nopad_ucs2
};
......@@ -35516,7 +34702,7 @@ struct charset_info_st my_charset_ucs2_unicode_520_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_generic_uca_nopad_handler
&my_uca_collation_handler_nopad_ucs2
};
......@@ -35524,20 +34710,12 @@ struct charset_info_st my_charset_ucs2_unicode_520_nopad_ci=
#ifdef HAVE_CHARSET_utf8
MY_COLLATION_HANDLER my_collation_any_uca_handler =
{
my_coll_init_uca, /* init */
my_strnncoll_any_uca,
my_strnncollsp_any_uca,
my_strnxfrm_any_uca,
my_strnxfrmlen_any_uca,
my_like_range_mb,
my_wildcmp_uca,
NULL,
my_instr_mb,
my_hash_sort_any_uca,
my_propagate_complex
};
#include "ctype-utf8.h"
#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf8mb3
#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf8mb3_quick(wc, beg, end))
#define MY_LIKE_RANGE my_like_range_mb
#include "ctype-uca.ic"
/*
......@@ -35600,7 +34778,7 @@ struct charset_info_st my_charset_utf8_unicode_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
......@@ -35633,7 +34811,7 @@ struct charset_info_st my_charset_utf8_icelandic_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_latvian_uca_ci=
......@@ -35665,7 +34843,7 @@ struct charset_info_st my_charset_utf8_latvian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_romanian_uca_ci=
......@@ -35697,7 +34875,7 @@ struct charset_info_st my_charset_utf8_romanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_slovenian_uca_ci=
......@@ -35729,7 +34907,7 @@ struct charset_info_st my_charset_utf8_slovenian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_polish_uca_ci=
......@@ -35761,7 +34939,7 @@ struct charset_info_st my_charset_utf8_polish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_estonian_uca_ci=
......@@ -35793,7 +34971,7 @@ struct charset_info_st my_charset_utf8_estonian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_spanish_uca_ci=
......@@ -35825,7 +35003,7 @@ struct charset_info_st my_charset_utf8_spanish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_swedish_uca_ci=
......@@ -35857,7 +35035,7 @@ struct charset_info_st my_charset_utf8_swedish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_turkish_uca_ci=
......@@ -35889,7 +35067,7 @@ struct charset_info_st my_charset_utf8_turkish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_czech_uca_ci=
......@@ -35921,7 +35099,7 @@ struct charset_info_st my_charset_utf8_czech_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
......@@ -35954,7 +35132,7 @@ struct charset_info_st my_charset_utf8_danish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_lithuanian_uca_ci=
......@@ -35986,7 +35164,7 @@ struct charset_info_st my_charset_utf8_lithuanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_slovak_uca_ci=
......@@ -36018,7 +35196,7 @@ struct charset_info_st my_charset_utf8_slovak_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_spanish2_uca_ci=
......@@ -36050,7 +35228,7 @@ struct charset_info_st my_charset_utf8_spanish2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_roman_uca_ci=
......@@ -36082,7 +35260,7 @@ struct charset_info_st my_charset_utf8_roman_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_persian_uca_ci=
......@@ -36114,7 +35292,7 @@ struct charset_info_st my_charset_utf8_persian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_esperanto_uca_ci=
......@@ -36146,7 +35324,7 @@ struct charset_info_st my_charset_utf8_esperanto_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_hungarian_uca_ci=
......@@ -36178,7 +35356,7 @@ struct charset_info_st my_charset_utf8_hungarian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_sinhala_uca_ci=
......@@ -36210,7 +35388,7 @@ struct charset_info_st my_charset_utf8_sinhala_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
......@@ -36243,7 +35421,7 @@ struct charset_info_st my_charset_utf8_german2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_croatian_mysql561_uca_ci=
......@@ -36275,7 +35453,7 @@ struct charset_info_st my_charset_utf8_croatian_mysql561_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
......@@ -36308,7 +35486,7 @@ struct charset_info_st my_charset_utf8_croatian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
......@@ -36341,7 +35519,7 @@ struct charset_info_st my_charset_utf8_myanmar_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
......@@ -36374,7 +35552,7 @@ struct charset_info_st my_charset_utf8_unicode_520_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
struct charset_info_st my_charset_utf8_thai_520_w2=
......@@ -36406,7 +35584,7 @@ struct charset_info_st my_charset_utf8_thai_520_w2=
0, /* escape_with_backslash_is_dangerous */
2, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler_multilevel
&my_uca_collation_handler_multilevel_utf8mb3
};
struct charset_info_st my_charset_utf8_vietnamese_ci=
......@@ -36438,7 +35616,7 @@ struct charset_info_st my_charset_utf8_vietnamese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb3
};
......@@ -36471,7 +35649,7 @@ struct charset_info_st my_charset_utf8_unicode_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_mb_uca_nopad_handler
&my_uca_collation_handler_nopad_utf8mb3
};
......@@ -36504,7 +35682,7 @@ struct charset_info_st my_charset_utf8_unicode_520_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8_handler,
&my_collation_mb_uca_nopad_handler
&my_uca_collation_handler_nopad_utf8mb3
};
#endif /* HAVE_CHARSET_utf8 */
......@@ -36512,6 +35690,12 @@ struct charset_info_st my_charset_utf8_unicode_520_nopad_ci=
#ifdef HAVE_CHARSET_utf8mb4
#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf8mb4
#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf8mb4_quick(wc, beg, end))
#define MY_LIKE_RANGE my_like_range_mb
#include "ctype-uca.ic"
extern MY_CHARSET_HANDLER my_charset_utf8mb4_handler;
#define MY_CS_UTF8MB4_UCA_FLAGS (MY_CS_COMMON_UCA_FLAGS|MY_CS_UNICODE_SUPPLEMENT)
......@@ -36546,7 +35730,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
......@@ -36579,7 +35763,7 @@ struct charset_info_st my_charset_utf8mb4_icelandic_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_latvian_uca_ci=
......@@ -36611,7 +35795,7 @@ struct charset_info_st my_charset_utf8mb4_latvian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_romanian_uca_ci=
......@@ -36643,7 +35827,7 @@ struct charset_info_st my_charset_utf8mb4_romanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_slovenian_uca_ci=
......@@ -36675,7 +35859,7 @@ struct charset_info_st my_charset_utf8mb4_slovenian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_polish_uca_ci=
......@@ -36707,7 +35891,7 @@ struct charset_info_st my_charset_utf8mb4_polish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_estonian_uca_ci=
......@@ -36739,7 +35923,7 @@ struct charset_info_st my_charset_utf8mb4_estonian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_spanish_uca_ci=
......@@ -36771,7 +35955,7 @@ struct charset_info_st my_charset_utf8mb4_spanish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_swedish_uca_ci=
......@@ -36803,7 +35987,7 @@ struct charset_info_st my_charset_utf8mb4_swedish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_turkish_uca_ci=
......@@ -36835,7 +36019,7 @@ struct charset_info_st my_charset_utf8mb4_turkish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_czech_uca_ci=
......@@ -36867,7 +36051,7 @@ struct charset_info_st my_charset_utf8mb4_czech_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
......@@ -36900,7 +36084,7 @@ struct charset_info_st my_charset_utf8mb4_danish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_lithuanian_uca_ci=
......@@ -36932,7 +36116,7 @@ struct charset_info_st my_charset_utf8mb4_lithuanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_slovak_uca_ci=
......@@ -36964,7 +36148,7 @@ struct charset_info_st my_charset_utf8mb4_slovak_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_spanish2_uca_ci=
......@@ -36996,7 +36180,7 @@ struct charset_info_st my_charset_utf8mb4_spanish2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_roman_uca_ci=
......@@ -37028,7 +36212,7 @@ struct charset_info_st my_charset_utf8mb4_roman_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_persian_uca_ci=
......@@ -37060,7 +36244,7 @@ struct charset_info_st my_charset_utf8mb4_persian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_esperanto_uca_ci=
......@@ -37092,7 +36276,7 @@ struct charset_info_st my_charset_utf8mb4_esperanto_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_hungarian_uca_ci=
......@@ -37124,7 +36308,7 @@ struct charset_info_st my_charset_utf8mb4_hungarian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_sinhala_uca_ci=
......@@ -37156,7 +36340,7 @@ struct charset_info_st my_charset_utf8mb4_sinhala_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_german2_uca_ci=
......@@ -37188,7 +36372,7 @@ struct charset_info_st my_charset_utf8mb4_german2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_croatian_mysql561_uca_ci=
......@@ -37220,7 +36404,7 @@ struct charset_info_st my_charset_utf8mb4_croatian_mysql561_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
......@@ -37253,7 +36437,7 @@ struct charset_info_st my_charset_utf8mb4_croatian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
......@@ -37286,7 +36470,7 @@ struct charset_info_st my_charset_utf8mb4_myanmar_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_thai_520_w2=
......@@ -37318,7 +36502,7 @@ struct charset_info_st my_charset_utf8mb4_thai_520_w2=
0, /* escape_with_backslash_is_dangerous */
2, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler_multilevel
&my_uca_collation_handler_multilevel_utf8mb4
};
struct charset_info_st my_charset_utf8mb4_unicode_520_ci=
......@@ -37350,7 +36534,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
......@@ -37383,7 +36567,7 @@ struct charset_info_st my_charset_utf8mb4_vietnamese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_any_uca_handler
&my_uca_collation_handler_utf8mb4
};
......@@ -37416,7 +36600,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_mb_uca_nopad_handler
&my_uca_collation_handler_nopad_utf8mb4
};
......@@ -37449,7 +36633,7 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf8mb4_handler,
&my_collation_mb_uca_nopad_handler
&my_uca_collation_handler_nopad_utf8mb4
};
......@@ -37458,20 +36642,11 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_nopad_ci=
#ifdef HAVE_CHARSET_utf32
MY_COLLATION_HANDLER my_collation_utf32_uca_handler =
{
my_coll_init_uca, /* init */
my_strnncoll_any_uca,
my_strnncollsp_any_uca,
my_strnxfrm_any_uca,
my_strnxfrmlen_any_uca,
my_like_range_generic,
my_wildcmp_uca,
NULL,
my_instr_mb,
my_hash_sort_any_uca,
my_propagate_complex
};
#include "ctype-utf32.h"
#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf32
#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf32_quick(wc, beg, end))
#define MY_LIKE_RANGE my_like_range_generic
#include "ctype-uca.ic"
extern MY_CHARSET_HANDLER my_charset_utf32_handler;
......@@ -37508,7 +36683,7 @@ struct charset_info_st my_charset_utf32_unicode_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
......@@ -37541,7 +36716,7 @@ struct charset_info_st my_charset_utf32_icelandic_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_latvian_uca_ci=
......@@ -37573,7 +36748,7 @@ struct charset_info_st my_charset_utf32_latvian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_romanian_uca_ci=
......@@ -37605,7 +36780,7 @@ struct charset_info_st my_charset_utf32_romanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_slovenian_uca_ci=
......@@ -37637,7 +36812,7 @@ struct charset_info_st my_charset_utf32_slovenian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_polish_uca_ci=
......@@ -37669,7 +36844,7 @@ struct charset_info_st my_charset_utf32_polish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_estonian_uca_ci=
......@@ -37701,7 +36876,7 @@ struct charset_info_st my_charset_utf32_estonian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_spanish_uca_ci=
......@@ -37733,7 +36908,7 @@ struct charset_info_st my_charset_utf32_spanish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_swedish_uca_ci=
......@@ -37765,7 +36940,7 @@ struct charset_info_st my_charset_utf32_swedish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_turkish_uca_ci=
......@@ -37797,7 +36972,7 @@ struct charset_info_st my_charset_utf32_turkish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_czech_uca_ci=
......@@ -37829,7 +37004,7 @@ struct charset_info_st my_charset_utf32_czech_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
......@@ -37862,7 +37037,7 @@ struct charset_info_st my_charset_utf32_danish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_lithuanian_uca_ci=
......@@ -37894,7 +37069,7 @@ struct charset_info_st my_charset_utf32_lithuanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_slovak_uca_ci=
......@@ -37926,7 +37101,7 @@ struct charset_info_st my_charset_utf32_slovak_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_spanish2_uca_ci=
......@@ -37958,7 +37133,7 @@ struct charset_info_st my_charset_utf32_spanish2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_roman_uca_ci=
......@@ -37990,7 +37165,7 @@ struct charset_info_st my_charset_utf32_roman_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_persian_uca_ci=
......@@ -38022,7 +37197,7 @@ struct charset_info_st my_charset_utf32_persian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_esperanto_uca_ci=
......@@ -38054,7 +37229,7 @@ struct charset_info_st my_charset_utf32_esperanto_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_hungarian_uca_ci=
......@@ -38086,7 +37261,7 @@ struct charset_info_st my_charset_utf32_hungarian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_sinhala_uca_ci=
......@@ -38118,7 +37293,7 @@ struct charset_info_st my_charset_utf32_sinhala_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_german2_uca_ci=
......@@ -38150,7 +37325,7 @@ struct charset_info_st my_charset_utf32_german2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_croatian_mysql561_uca_ci=
......@@ -38182,7 +37357,7 @@ struct charset_info_st my_charset_utf32_croatian_mysql561_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
struct charset_info_st my_charset_utf32_croatian_uca_ci=
......@@ -38214,7 +37389,7 @@ struct charset_info_st my_charset_utf32_croatian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
......@@ -38247,7 +37422,7 @@ struct charset_info_st my_charset_utf32_myanmar_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
......@@ -38280,7 +37455,7 @@ struct charset_info_st my_charset_utf32_thai_520_w2=
0, /* escape_with_backslash_is_dangerous */
2, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_any_uca_handler_multilevel
&my_uca_collation_handler_multilevel_utf32
};
......@@ -38313,7 +37488,7 @@ struct charset_info_st my_charset_utf32_unicode_520_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
......@@ -38346,7 +37521,7 @@ struct charset_info_st my_charset_utf32_vietnamese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_utf32_uca_handler
&my_uca_collation_handler_utf32
};
......@@ -38379,7 +37554,7 @@ struct charset_info_st my_charset_utf32_unicode_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_generic_uca_nopad_handler
&my_uca_collation_handler_nopad_utf32
};
......@@ -38412,7 +37587,7 @@ struct charset_info_st my_charset_utf32_unicode_520_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf32_handler,
&my_collation_generic_uca_nopad_handler
&my_uca_collation_handler_nopad_utf32
};
......@@ -38422,21 +37597,11 @@ struct charset_info_st my_charset_utf32_unicode_520_nopad_ci=
#ifdef HAVE_CHARSET_utf16
MY_COLLATION_HANDLER my_collation_utf16_uca_handler =
{
my_coll_init_uca, /* init */
my_strnncoll_any_uca,
my_strnncollsp_any_uca,
my_strnxfrm_any_uca,
my_strnxfrmlen_any_uca,
my_like_range_generic,
my_wildcmp_uca,
NULL,
my_instr_mb,
my_hash_sort_any_uca,
my_propagate_complex
};
#include "ctype-utf16.h"
#define MY_FUNCTION_NAME(x) my_uca_ ## x ## _utf16
#define MY_MB_WC(scanner, wc, beg, end) (my_mb_wc_utf16_quick(wc, beg, end))
#define MY_LIKE_RANGE my_like_range_generic
#include "ctype-uca.ic"
extern MY_CHARSET_HANDLER my_charset_utf16_handler;
......@@ -38473,7 +37638,7 @@ struct charset_info_st my_charset_utf16_unicode_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
......@@ -38506,7 +37671,7 @@ struct charset_info_st my_charset_utf16_icelandic_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_latvian_uca_ci=
......@@ -38538,7 +37703,7 @@ struct charset_info_st my_charset_utf16_latvian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_romanian_uca_ci=
......@@ -38570,7 +37735,7 @@ struct charset_info_st my_charset_utf16_romanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_slovenian_uca_ci=
......@@ -38602,7 +37767,7 @@ struct charset_info_st my_charset_utf16_slovenian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_polish_uca_ci=
......@@ -38634,7 +37799,7 @@ struct charset_info_st my_charset_utf16_polish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_estonian_uca_ci=
......@@ -38666,7 +37831,7 @@ struct charset_info_st my_charset_utf16_estonian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_spanish_uca_ci=
......@@ -38698,7 +37863,7 @@ struct charset_info_st my_charset_utf16_spanish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_swedish_uca_ci=
......@@ -38730,7 +37895,7 @@ struct charset_info_st my_charset_utf16_swedish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_turkish_uca_ci=
......@@ -38762,7 +37927,7 @@ struct charset_info_st my_charset_utf16_turkish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_czech_uca_ci=
......@@ -38794,7 +37959,7 @@ struct charset_info_st my_charset_utf16_czech_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
......@@ -38827,7 +37992,7 @@ struct charset_info_st my_charset_utf16_danish_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_lithuanian_uca_ci=
......@@ -38859,7 +38024,7 @@ struct charset_info_st my_charset_utf16_lithuanian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_slovak_uca_ci=
......@@ -38891,7 +38056,7 @@ struct charset_info_st my_charset_utf16_slovak_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_spanish2_uca_ci=
......@@ -38923,7 +38088,7 @@ struct charset_info_st my_charset_utf16_spanish2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_roman_uca_ci=
......@@ -38955,7 +38120,7 @@ struct charset_info_st my_charset_utf16_roman_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_persian_uca_ci=
......@@ -38987,7 +38152,7 @@ struct charset_info_st my_charset_utf16_persian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_esperanto_uca_ci=
......@@ -39019,7 +38184,7 @@ struct charset_info_st my_charset_utf16_esperanto_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_hungarian_uca_ci=
......@@ -39051,7 +38216,7 @@ struct charset_info_st my_charset_utf16_hungarian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_sinhala_uca_ci=
......@@ -39083,7 +38248,7 @@ struct charset_info_st my_charset_utf16_sinhala_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
struct charset_info_st my_charset_utf16_german2_uca_ci=
......@@ -39115,7 +38280,7 @@ struct charset_info_st my_charset_utf16_german2_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
......@@ -39148,7 +38313,7 @@ struct charset_info_st my_charset_utf16_croatian_mysql561_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
......@@ -39181,7 +38346,7 @@ struct charset_info_st my_charset_utf16_croatian_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
......@@ -39214,7 +38379,7 @@ struct charset_info_st my_charset_utf16_myanmar_uca_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
......@@ -39247,7 +38412,7 @@ struct charset_info_st my_charset_utf16_thai_520_w2=
0, /* escape_with_backslash_is_dangerous */
2, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_any_uca_handler_multilevel
&my_uca_collation_handler_multilevel_utf16
};
......@@ -39280,7 +38445,7 @@ struct charset_info_st my_charset_utf16_unicode_520_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
......@@ -39313,7 +38478,7 @@ struct charset_info_st my_charset_utf16_vietnamese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_utf16_uca_handler
&my_uca_collation_handler_utf16
};
......@@ -39346,7 +38511,7 @@ struct charset_info_st my_charset_utf16_unicode_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_generic_uca_nopad_handler
&my_uca_collation_handler_nopad_utf16
};
......@@ -39379,7 +38544,7 @@ struct charset_info_st my_charset_utf16_unicode_520_nopad_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_utf16_handler,
&my_collation_generic_uca_nopad_handler
&my_uca_collation_handler_nopad_utf16
};
/*
Copyright (c) 2018 MariaDB Corporation
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef MY_FUNCTION_NAME
#error MY_FUNCTION_NAME is not defined
#endif
#ifndef MY_MB_WC
#error MY_MB_WC is not defined
#endif
#ifndef MY_LIKE_RANGE
#error MY_LIKE_RANGE is not defined
#endif
static inline int
MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
{
/*
Check if the weights for the previous character have been
already fully scanned. If yes, then get the next character and
initialize wbeg and wlength to its weight string.
*/
if (scanner->wbeg[0]) /* More weights left from the previous step: */
return *scanner->wbeg++; /* return the next weight from expansion */
do
{
const uint16 *wpage;
my_wc_t wc[MY_UCA_MAX_CONTRACTION];
int mblen;
/* Get next character */
if (((mblen= MY_MB_WC(scanner, wc, scanner->sbeg,
scanner->send)) <= 0))
{
if (scanner->sbeg >= scanner->send)
return -1; /* No more bytes, end of line reached */
/*
There are some more bytes left. Non-positive mb_len means that
we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
*/
if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send)
{
/* For safety purposes don't go beyond the string range. */
scanner->sbeg= scanner->send;
}
/*
Treat every complete or incomplete mbminlen unit as a weight which is
greater than weight for any possible normal character.
0xFFFF is greater than any possible weight in the UCA weight table.
*/
return 0xFFFF;
}
scanner->sbeg+= mblen;
if (wc[0] > scanner->level->maxchar)
{
/* Return 0xFFFD as weight for all characters outside BMP */
scanner->wbeg= nochar;
return 0xFFFD;
}
if (my_uca_have_contractions_quick(scanner->level))
{
uint16 *cweight;
/*
If we have scanned a character which can have previous context,
and there were some more characters already before,
then reconstruct codepoint of the previous character
from "page" and "code" into w[1], and verify that {wc[1], wc[0]}
together form a real previous context pair.
Note, we support only 2-character long sequences with previous
context at the moment. CLDR does not have longer sequences.
*/
if (my_uca_can_be_previous_context_tail(&scanner->level->contractions,
wc[0]) &&
scanner->wbeg != nochar && /* if not the very first character */
my_uca_can_be_previous_context_head(&scanner->level->contractions,
(wc[1]= ((scanner->page << 8) +
scanner->code))) &&
(cweight= my_uca_previous_context_find(scanner, wc[1], wc[0])))
{
scanner->page= scanner->code= 0; /* Clear for the next character */
return *cweight;
}
else if (my_uca_can_be_contraction_head(&scanner->level->contractions,
wc[0]))
{
/* Check if w[0] starts a contraction */
if ((cweight= my_uca_scanner_contraction_find(scanner, wc)))
return *cweight;
}
}
/* Process single character */
scanner->page= wc[0] >> 8;
scanner->code= wc[0] & 0xFF;
/* If weight page for w[0] does not exist, then calculate algoritmically */
if (!(wpage= scanner->level->weights[scanner->page]))
return my_uca_scanner_next_implicit(scanner);
/* Calculate pointer to w[0]'s weight, using page and offset */
scanner->wbeg= wpage +
scanner->code * scanner->level->lengths[scanner->page];
} while (!scanner->wbeg[0]); /* Skip ignorable characters */
return *scanner->wbeg++;
}
/*
Compares two strings according to the collation
SYNOPSIS:
strnncoll_onelevel()
cs Character set information
level Weight level (0 primary, 1 secondary, 2 tertiary, etc)
s First string
slen First string length
t Second string
tlen Seconf string length
level DUCETweight level
NOTES:
Initializes two weight scanners and gets weights
corresponding to two strings in a loop. If weights are not
the same at some step then returns their difference.
In the while() comparison these situations are possible:
1. (s_res>0) and (t_res>0) and (s_res == t_res)
Weights are the same so far, continue comparison
2. (s_res>0) and (t_res>0) and (s_res!=t_res)
A difference has been found, return.
3. (s_res>0) and (t_res<0)
We have reached the end of the second string, or found
an illegal multibyte sequence in the second string.
Return a positive number, i.e. the first string is bigger.
4. (s_res<0) and (t_res>0)
We have reached the end of the first string, or found
an illegal multibyte sequence in the first string.
Return a negative number, i.e. the second string is bigger.
5. (s_res<0) and (t_res<0)
Both scanners returned -1. It means we have riched
the end-of-string of illegal-sequence in both strings
at the same time. Return 0, strings are equal.
RETURN
Difference between two strings, according to the collation:
0 - means strings are equal
negative number - means the first string is smaller
positive number - means the first string is bigger
*/
static int
MY_FUNCTION_NAME(strnncoll_onelevel)(CHARSET_INFO *cs,
const MY_UCA_WEIGHT_LEVEL *level,
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
my_bool t_is_prefix)
{
my_uca_scanner sscanner;
my_uca_scanner tscanner;
int s_res;
int t_res;
my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
do
{
s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
} while ( s_res == t_res && s_res >0);
return (t_is_prefix && t_res < 0) ? 0 : (s_res - t_res);
}
/*
One-level, PAD SPACE.
*/
static int
MY_FUNCTION_NAME(strnncoll)(CHARSET_INFO *cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
my_bool t_is_prefix)
{
return MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[0],
s, slen, t, tlen, t_is_prefix);
}
/*
Multi-level, PAD SPACE.
*/
static int
MY_FUNCTION_NAME(strnncoll_multilevel)(CHARSET_INFO *cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
my_bool t_is_prefix)
{
uint i, num_level= cs->levels_for_order;
for (i= 0; i != num_level; i++)
{
int ret= MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[i],
s, slen, t, tlen,
t_is_prefix);
if (ret)
return ret;
}
return 0;
}
/*
Compares two strings according to the collation,
ignoring trailing spaces.
SYNOPSIS:
strnncollsp_onelevel()
cs Character set information
level UCA weight level
s First string
slen First string length
t Second string
tlen Seconf string length
level DUCETweight level
NOTES:
Works exactly the same with my_strnncoll_uca(),
but ignores trailing spaces.
In the while() comparison these situations are possible:
1. (s_res>0) and (t_res>0) and (s_res == t_res)
Weights are the same so far, continue comparison
2. (s_res>0) and (t_res>0) and (s_res!=t_res)
A difference has been found, return.
3. (s_res>0) and (t_res<0)
We have reached the end of the second string, or found
an illegal multibyte sequence in the second string.
Compare the first string to an infinite array of
space characters until difference is found, or until
the end of the first string.
4. (s_res<0) and (t_res>0)
We have reached the end of the first string, or found
an illegal multibyte sequence in the first string.
Compare the second string to an infinite array of
space characters until difference is found or until
the end of the second steing.
5. (s_res<0) and (t_res<0)
Both scanners returned -1. It means we have riched
the end-of-string of illegal-sequence in both strings
at the same time. Return 0, strings are equal.
RETURN
Difference between two strings, according to the collation:
0 - means strings are equal
negative number - means the first string is smaller
positive number - means the first string is bigger
*/
static int
MY_FUNCTION_NAME(strnncollsp_onelevel)(CHARSET_INFO *cs,
const MY_UCA_WEIGHT_LEVEL *level,
const uchar *s, size_t slen,
const uchar *t, size_t tlen)
{
my_uca_scanner sscanner, tscanner;
int s_res, t_res;
my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
do
{
s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
} while ( s_res == t_res && s_res >0);
if (s_res > 0 && t_res < 0)
{
/* Calculate weight for SPACE character */
t_res= my_space_weight(level);
/* compare the first string to spaces */
do
{
if (s_res != t_res)
return (s_res - t_res);
s_res= MY_FUNCTION_NAME(scanner_next)(&sscanner);
} while (s_res > 0);
return 0;
}
if (s_res < 0 && t_res > 0)
{
/* Calculate weight for SPACE character */
s_res= my_space_weight(level);
/* compare the second string to spaces */
do
{
if (s_res != t_res)
return (s_res - t_res);
t_res= MY_FUNCTION_NAME(scanner_next)(&tscanner);
} while (t_res > 0);
return 0;
}
return ( s_res - t_res );
}
/*
One-level, PAD SPACE
*/
static int
MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen)
{
return MY_FUNCTION_NAME(strnncollsp_onelevel)(cs, &cs->uca->level[0],
s, slen, t, tlen);
}
/*
One-level, NO PAD
*/
static int
MY_FUNCTION_NAME(strnncollsp_nopad)(CHARSET_INFO *cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen)
{
return MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[0],
s, slen, t, tlen, FALSE);
}
/*
Multi-level, PAD SPACE
*/
static int
MY_FUNCTION_NAME(strnncollsp_multilevel)(CHARSET_INFO *cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen)
{
uint i, num_level= cs->levels_for_order;
for (i= 0; i != num_level; i++)
{
int ret= MY_FUNCTION_NAME(strnncollsp_onelevel)(cs, &cs->uca->level[i],
s, slen, t, tlen);
if (ret)
return ret;
}
return 0;
}
/*
Multi-level, NO PAD
*/
static int
MY_FUNCTION_NAME(strnncollsp_nopad_multilevel)(CHARSET_INFO *cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen)
{
uint num_level= cs->levels_for_order;
uint i;
for (i= 0; i != num_level; i++)
{
int ret= MY_FUNCTION_NAME(strnncoll_onelevel)(cs, &cs->uca->level[i],
s, slen, t, tlen, FALSE);
if (ret)
return ret;
}
return 0;
}
/*
Calculates hash value for the given string,
according to the collation, and ignoring trailing spaces.
SYNOPSIS:
hash_sort()
cs Character set information
s String
slen String's length
n1 First hash parameter
n2 Second hash parameter
NOTES:
Scans consequently weights and updates
hash parameters n1 and n2. In a case insensitive collation,
upper and lower case of the same letter will return the same
weight sequence, and thus will produce the same hash values
in n1 and n2.
This functions is used for one-level and for multi-level collations.
We intentionally use only primary level in multi-level collations.
This helps to have PARTITION BY KEY put primarily equal records
into the same partition. E.g. in utf8_thai_520_ci records that differ
only in tone marks go into the same partition.
RETURN
N/A
*/
static void
MY_FUNCTION_NAME(hash_sort)(CHARSET_INFO *cs,
const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
int s_res;
my_uca_scanner scanner;
int space_weight= my_space_weight(&cs->uca->level[0]);
register ulong m1= *nr1, m2= *nr2;
my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen);
while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) >0)
{
if (s_res == space_weight)
{
/* Combine all spaces to be able to skip end spaces */
uint count= 0;
do
{
count++;
if ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) <= 0)
{
/* Skip strings at end of string */
goto end;
}
}
while (s_res == space_weight);
/* Add back that has for the space characters */
do
{
/*
We can't use MY_HASH_ADD_16() here as we, because of a misstake
in the original code, where we added the 16 byte variable the
opposite way. Changing this would cause old partitioned tables
to fail.
*/
MY_HASH_ADD(m1, m2, space_weight >> 8);
MY_HASH_ADD(m1, m2, space_weight & 0xFF);
}
while (--count != 0);
}
/* See comment above why we can't use MY_HASH_ADD_16() */
MY_HASH_ADD(m1, m2, s_res >> 8);
MY_HASH_ADD(m1, m2, s_res & 0xFF);
}
end:
*nr1= m1;
*nr2= m2;
}
static void
MY_FUNCTION_NAME(hash_sort_nopad)(CHARSET_INFO *cs,
const uchar *s, size_t slen,
ulong *nr1, ulong *nr2)
{
int s_res;
my_uca_scanner scanner;
register ulong m1= *nr1, m2= *nr2;
my_uca_scanner_init_any(&scanner, cs, &cs->uca->level[0], s, slen);
while ((s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) >0)
{
/* See comment above why we can't use MY_HASH_ADD_16() */
MY_HASH_ADD(m1, m2, s_res >> 8);
MY_HASH_ADD(m1, m2, s_res & 0xFF);
}
*nr1= m1;
*nr2= m2;
}
/*
For the given string creates its "binary image", suitable
to be used in binary comparison, i.e. in memcmp().
SYNOPSIS:
my_strnxfrm_uca()
cs Character set information
dst Where to write the image
dstlen Space available for the image, in bytes
src The source string
srclen Length of the source string, in bytes
NOTES:
In a loop, scans weights from the source string and writes
them into the binary image. In a case insensitive collation,
upper and lower cases of the same letter will produce the
same image subsequences. When we have reached the end-of-string
or found an illegal multibyte sequence, the loop stops.
It is impossible to restore the original string using its
binary image.
Binary images are used for bulk comparison purposes,
e.g. in ORDER BY, when it is more efficient to create
a binary image and use it instead of weight scanner
for the original strings for every comparison.
RETURN
Number of bytes that have been written into the binary image.
*/
static uchar *
MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(CHARSET_INFO *cs,
MY_UCA_WEIGHT_LEVEL *level,
uchar *dst, uchar *de,
uint *nweights,
const uchar *src, size_t srclen)
{
my_uca_scanner scanner;
int s_res;
DBUG_ASSERT(src || !srclen);
my_uca_scanner_init_any(&scanner, cs, level, src, srclen);
for (; dst < de && *nweights &&
(s_res= MY_FUNCTION_NAME(scanner_next)(&scanner)) > 0 ; (*nweights)--)
{
*dst++= s_res >> 8;
if (dst < de)
*dst++= s_res & 0xFF;
}
return dst;
}
static uchar *
MY_FUNCTION_NAME(strnxfrm_onelevel)(CHARSET_INFO *cs,
MY_UCA_WEIGHT_LEVEL *level,
uchar *dst, uchar *de, uint nweights,
const uchar *src, size_t srclen, uint flags)
{
uchar *d0= dst;
dst= MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(cs, level,
dst, de, &nweights,
src, srclen);
DBUG_ASSERT(dst <= de);
if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
dst= my_strnxfrm_uca_padn(dst, de, nweights, my_space_weight(level));
DBUG_ASSERT(dst <= de);
my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
return dst;
}
static uchar *
MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(CHARSET_INFO *cs,
MY_UCA_WEIGHT_LEVEL *level,
uchar *dst, uchar *de, uint nweights,
const uchar *src, size_t srclen,
uint flags)
{
uchar *d0= dst;
dst= MY_FUNCTION_NAME(strnxfrm_onelevel_internal)(cs, level,
dst, de, &nweights,
src, srclen);
DBUG_ASSERT(dst <= de);
/* Pad with the minimum possible weight on this level */
if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
dst= my_strnxfrm_uca_padn(dst, de, nweights, min_weight_on_level(level));
DBUG_ASSERT(dst <= de);
my_strxfrm_desc_and_reverse(d0, dst, flags, 0);
return dst;
}
static size_t
MY_FUNCTION_NAME(strnxfrm)(CHARSET_INFO *cs,
uchar *dst, size_t dstlen, uint nweights,
const uchar *src, size_t srclen, uint flags)
{
uchar *d0= dst;
uchar *de= dst + dstlen;
dst= MY_FUNCTION_NAME(strnxfrm_onelevel)(cs, &cs->uca->level[0],
dst, de, nweights,
src, srclen, flags);
/*
This can probably be changed to memset(dst, 0, de - dst),
like my_strnxfrm_uca_multilevel() does.
*/
if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
dst= my_strnxfrm_uca_pad(dst, de, my_space_weight(&cs->uca->level[0]));
return dst - d0;
}
static size_t
MY_FUNCTION_NAME(strnxfrm_nopad)(CHARSET_INFO *cs,
uchar *dst, size_t dstlen,
uint nweights,
const uchar *src, size_t srclen,
uint flags)
{
uchar *d0= dst;
uchar *de= dst + dstlen;
dst= MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(cs, &cs->uca->level[0],
dst, de, nweights,
src, srclen, flags);
if ((flags & MY_STRXFRM_PAD_TO_MAXLEN) && dst < de)
{
memset(dst, 0, de - dst);
dst= de;
}
return dst - d0;
}
static size_t
MY_FUNCTION_NAME(strnxfrm_multilevel)(CHARSET_INFO *cs,
uchar *dst, size_t dstlen,
uint nweights,
const uchar *src, size_t srclen,
uint flags)
{
uint num_level= cs->levels_for_order;
uchar *d0= dst;
uchar *de= dst + dstlen;
uint current_level;
for (current_level= 0; current_level != num_level; current_level++)
{
if (!(flags & MY_STRXFRM_LEVEL_ALL) ||
(flags & (MY_STRXFRM_LEVEL1 << current_level)))
dst= cs->state & MY_CS_NOPAD ?
MY_FUNCTION_NAME(strnxfrm_nopad_onelevel)(cs,
&cs->uca->level[current_level],
dst, de, nweights,
src, srclen, flags) :
MY_FUNCTION_NAME(strnxfrm_onelevel)(cs,
&cs->uca->level[current_level],
dst, de, nweights,
src, srclen, flags);
}
if (dst < de && (flags & MY_STRXFRM_PAD_TO_MAXLEN))
{
memset(dst, 0, de - dst);
dst= de;
}
return dst - d0;
}
/*
One-level, PAD SPACE
*/
MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler)=
{
my_coll_init_uca,
MY_FUNCTION_NAME(strnncoll),
MY_FUNCTION_NAME(strnncollsp),
MY_FUNCTION_NAME(strnxfrm),
my_strnxfrmlen_any_uca,
MY_LIKE_RANGE,
my_wildcmp_uca,
NULL, /* strcasecmp() */
my_instr_mb,
MY_FUNCTION_NAME(hash_sort),
my_propagate_complex
};
/*
One-level, NO PAD
For character sets with mbminlen==1 use MY_LIKE_RANGE=my_like_range_mb
For character sets with mbminlen>=2 use MY_LIKE_RANGE=my_like_range_generic
*/
MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad)=
{
my_coll_init_uca,
MY_FUNCTION_NAME(strnncoll),
MY_FUNCTION_NAME(strnncollsp_nopad),
MY_FUNCTION_NAME(strnxfrm_nopad),
my_strnxfrmlen_any_uca,
MY_LIKE_RANGE, /* my_like_range_mb or my_like_range_generic */
my_wildcmp_uca,
NULL, /* strcasecmp() */
my_instr_mb,
MY_FUNCTION_NAME(hash_sort_nopad),
my_propagate_complex
};
/*
Multi-level, PAD SPACE
*/
MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_multilevel)=
{
my_coll_init_uca,
MY_FUNCTION_NAME(strnncoll_multilevel),
MY_FUNCTION_NAME(strnncollsp_multilevel),
MY_FUNCTION_NAME(strnxfrm_multilevel),
my_strnxfrmlen_any_uca_multilevel,
MY_LIKE_RANGE,
my_wildcmp_uca,
NULL, /* strcasecmp() */
my_instr_mb,
MY_FUNCTION_NAME(hash_sort),
my_propagate_complex
};
/*
Multi-level, NO PAD
*/
MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad_multilevel)=
{
my_coll_init_uca,
MY_FUNCTION_NAME(strnncoll_multilevel),
MY_FUNCTION_NAME(strnncollsp_nopad_multilevel),
MY_FUNCTION_NAME(strnxfrm_multilevel),
my_strnxfrmlen_any_uca_multilevel,
MY_LIKE_RANGE,
my_wildcmp_uca,
NULL, /* strcasecmp() */
my_instr_mb,
MY_FUNCTION_NAME(hash_sort),
my_propagate_complex
};
#undef MY_FUNCTION_NAME
#undef MY_MB_WC
#undef MY_LIKE_RANGE
......@@ -1184,35 +1184,7 @@ my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)),
but the JSON functions needed my_utf16_uni()
so the #ifdef was moved lower.
*/
/*
D800..DB7F - Non-provate surrogate high (896 pages)
DB80..DBFF - Private surrogate high (128 pages)
DC00..DFFF - Surrogate low (1024 codes in a page)
*/
#define MY_UTF16_SURROGATE_HIGH_FIRST 0xD800
#define MY_UTF16_SURROGATE_HIGH_LAST 0xDBFF
#define MY_UTF16_SURROGATE_LOW_FIRST 0xDC00
#define MY_UTF16_SURROGATE_LOW_LAST 0xDFFF
#define MY_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8)
#define MY_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC)
/* Test if a byte is a leading byte of a high or low surrogate head: */
#define MY_UTF16_SURROGATE_HEAD(x) ((((uchar) (x)) & 0xF8) == 0xD8)
/* Test if a Unicode code point is a high or low surrogate head */
#define MY_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800)
#define MY_UTF16_WC2(a, b) ((a << 8) + b)
/*
a= 110110?? (<< 18)
b= ???????? (<< 10)
c= 110111?? (<< 8)
d= ???????? (<< 0)
*/
#define MY_UTF16_WC4(a, b, c, d) (((a & 3) << 18) + (b << 10) + \
((c & 3) << 8) + d + 0x10000)
#include "ctype-utf16.h"
#define IS_MB2_CHAR(b0,b1) (!MY_UTF16_SURROGATE_HEAD(b0))
#define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b0) && MY_UTF16_LOW_HEAD(b2))
......@@ -1261,32 +1233,7 @@ static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1)
my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t *pwc, const uchar *s, const uchar *e)
{
if (s + 2 > e)
return MY_CS_TOOSMALL2;
/*
High bytes: 0xD[89AB] = B'110110??'
Low bytes: 0xD[CDEF] = B'110111??'
Surrogate mask: 0xFC = B'11111100'
*/
if (MY_UTF16_HIGH_HEAD(*s)) /* Surrogate head */
{
if (s + 4 > e)
return MY_CS_TOOSMALL4;
if (!MY_UTF16_LOW_HEAD(s[2])) /* Broken surrigate pair */
return MY_CS_ILSEQ;
*pwc= MY_UTF16_WC4(s[0], s[1], s[2], s[3]);
return 4;
}
if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */
return MY_CS_ILSEQ;
*pwc= MY_UTF16_WC2(s[0], s[1]);
return 2;
return my_mb_wc_utf16_quick(pwc, s, e);
}
......@@ -2109,6 +2056,8 @@ struct charset_info_st my_charset_utf16le_nopad_bin=
#ifdef HAVE_CHARSET_utf32
#include "ctype-utf32.h"
/*
Check is b0 and b1 start a valid UTF32 four-byte sequence.
Don't accept characters greater than U+10FFFF.
......@@ -2117,8 +2066,6 @@ struct charset_info_st my_charset_utf16le_nopad_bin=
#define IS_MB4_CHAR(b0,b1,b2,b3) (IS_UTF32_MBHEAD4(b0,b1))
#define MY_UTF32_WC4(b0,b1,b2,b3) ((((my_wc_t)b0) << 24) + (b1 << 16) + \
(b2 << 8) + (b3))
static inline int my_weight_utf32_general_ci(uchar b0, uchar b1,
uchar b2, uchar b3)
......@@ -2161,10 +2108,7 @@ static int
my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t *pwc, const uchar *s, const uchar *e)
{
if (s + 4 > e)
return MY_CS_TOOSMALL4;
*pwc= MY_UTF32_WC4(s[0], s[1], s[2], s[3]);
return *pwc > 0x10FFFF ? MY_CS_ILSEQ : 4;
return my_mb_wc_utf32_quick(pwc, s, e);
}
......@@ -2928,6 +2872,8 @@ struct charset_info_st my_charset_utf32_nopad_bin=
#ifdef HAVE_CHARSET_ucs2
#include "ctype-ucs2.h"
static const uchar ctype_ucs2[] = {
0,
32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
......@@ -3037,11 +2983,7 @@ my_charlen_ucs2(CHARSET_INFO *cs __attribute__((unused)),
static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t * pwc, const uchar *s, const uchar *e)
{
if (s+2 > e) /* Need 2 characters */
return MY_CS_TOOSMALL2;
*pwc= ((uchar)s[0]) * 256 + ((uchar)s[1]);
return 2;
return my_mb_wc_ucs2_quick(pwc, s, e);
}
static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
......
/*
Copyright (c) 2018 MariaDB Corporation
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _CTYPE_UCS2_H
#define _CTYPE_UCS2_H
static inline int
my_mb_wc_ucs2_quick(my_wc_t * pwc, const uchar *s, const uchar *e)
{
if (s+2 > e) /* Need 2 characters */
return MY_CS_TOOSMALL2;
*pwc= ((uchar)s[0]) * 256 + ((uchar)s[1]);
return 2;
}
#endif /* _CTYPE_UCS2_H */
/*
Copyright (c) 2018 MariaDB Corporation
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _CTYPE_UTF16_H
#define _CTYPE_UTF16_H
/*
D800..DB7F - Non-provate surrogate high (896 pages)
DB80..DBFF - Private surrogate high (128 pages)
DC00..DFFF - Surrogate low (1024 codes in a page)
*/
#define MY_UTF16_SURROGATE_HIGH_FIRST 0xD800
#define MY_UTF16_SURROGATE_HIGH_LAST 0xDBFF
#define MY_UTF16_SURROGATE_LOW_FIRST 0xDC00
#define MY_UTF16_SURROGATE_LOW_LAST 0xDFFF
#define MY_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8)
#define MY_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC)
/* Test if a byte is a leading byte of a high or low surrogate head: */
#define MY_UTF16_SURROGATE_HEAD(x) ((((uchar) (x)) & 0xF8) == 0xD8)
/* Test if a Unicode code point is a high or low surrogate head */
#define MY_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800)
#define MY_UTF16_WC2(a, b) ((a << 8) + b)
/*
a= 110110?? (<< 18)
b= ???????? (<< 10)
c= 110111?? (<< 8)
d= ???????? (<< 0)
*/
#define MY_UTF16_WC4(a, b, c, d) (((a & 3) << 18) + (b << 10) + \
((c & 3) << 8) + d + 0x10000)
static inline int
my_mb_wc_utf16_quick(my_wc_t *pwc, const uchar *s, const uchar *e)
{
if (s + 2 > e)
return MY_CS_TOOSMALL2;
/*
High bytes: 0xD[89AB] = B'110110??'
Low bytes: 0xD[CDEF] = B'110111??'
Surrogate mask: 0xFC = B'11111100'
*/
if (MY_UTF16_HIGH_HEAD(*s)) /* Surrogate head */
{
if (s + 4 > e)
return MY_CS_TOOSMALL4;
if (!MY_UTF16_LOW_HEAD(s[2])) /* Broken surrigate pair */
return MY_CS_ILSEQ;
*pwc= MY_UTF16_WC4(s[0], s[1], s[2], s[3]);
return 4;
}
if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */
return MY_CS_ILSEQ;
*pwc= MY_UTF16_WC2(s[0], s[1]);
return 2;
}
#endif /* _CTYPE_UTF16_H */
/*
Copyright (c) 2018 MariaDB Corporation
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _CTYPE_UTF32_H
#define _CTYPE_UTF32_H
#define MY_UTF32_WC4(b0,b1,b2,b3) ((((my_wc_t)b0) << 24) + (b1 << 16) + \
(b2 << 8) + (b3))
static inline int
my_mb_wc_utf32_quick(my_wc_t *pwc, const uchar *s, const uchar *e)
{
if (s + 4 > e)
return MY_CS_TOOSMALL4;
*pwc= MY_UTF32_WC4(s[0], s[1], s[2], s[3]);
return *pwc > 0x10FFFF ? MY_CS_ILSEQ : 4;
}
#endif /* _CTYPE_UTF32_H */
......@@ -26,78 +26,9 @@
#define EILSEQ ENOENT
#endif
/* Detect special bytes and sequences */
#define IS_CONTINUATION_BYTE(c) (((uchar) (c) ^ 0x80) < 0x40)
/*
Check MB2 character assuming that b0 is alredy known to be >= 0xC2.
Use this macro if the caller already checked b0 for:
- an MB1 character
- an unused gap between MB1 and MB2HEAD
*/
#define IS_UTF8MB2_STEP2(b0,b1) (((uchar) (b0) < 0xE0) && \
IS_CONTINUATION_BYTE((uchar) b1))
/*
Check MB3 character assuming that b0 is already known to be
in the valid MB3HEAD range [0xE0..0xEF].
*/
#define IS_UTF8MB3_STEP2(b0,b1,b2) (IS_CONTINUATION_BYTE(b1) && \
IS_CONTINUATION_BYTE(b2) && \
((uchar) b0 >= 0xe1 || (uchar) b1 >= 0xa0))
/*
Check MB3 character assuming that b0 is already known to be >= 0xE0,
but is not checked for the high end 0xF0 yet.
Use this macro if the caller already checked b0 for:
- an MB1 character
- an unused gap between MB1 and MB2HEAD
- an MB2HEAD
*/
#define IS_UTF8MB3_STEP3(b0,b1,b2) (((uchar) (b0) < 0xF0) && \
IS_UTF8MB3_STEP2(b0,b1,b2))
/*
UTF-8 quick four-byte mask:
11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
Encoding allows to encode U+00010000..U+001FFFFF
The maximum character defined in the Unicode standard is U+0010FFFF.
Higher characters U+00110000..U+001FFFFF are not used.
11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
Valid codes:
[F0][90..BF][80..BF][80..BF]
[F1][80..BF][80..BF][80..BF]
[F2][80..BF][80..BF][80..BF]
[F3][80..BF][80..BF][80..BF]
[F4][80..8F][80..BF][80..BF]
*/
#include "ctype-utf8.h"
/*
Check MB4 character assuming that b0 is already
known to be in the range [0xF0..0xF4]
*/
#define IS_UTF8MB4_STEP2(b0,b1,b2,b3) (IS_CONTINUATION_BYTE(b1) && \
IS_CONTINUATION_BYTE(b2) && \
IS_CONTINUATION_BYTE(b3) && \
(b0 >= 0xf1 || b1 >= 0x90) && \
(b0 <= 0xf3 || b1 <= 0x8F))
#define IS_UTF8MB4_STEP3(b0,b1,b2,b3) (((uchar) (b0) < 0xF5) && \
IS_UTF8MB4_STEP2(b0,b1,b2,b3))
/* Convert individual bytes to Unicode code points */
#define UTF8MB2_CODE(b0,b1) (((my_wc_t) ((uchar) b0 & 0x1f) << 6) |\
((my_wc_t) ((uchar) b1 ^ 0x80)))
#define UTF8MB3_CODE(b0,b1,b2) (((my_wc_t) ((uchar) b0 & 0x0f) << 12) |\
((my_wc_t) ((uchar) b1 ^ 0x80) << 6) |\
((my_wc_t) ((uchar) b2 ^ 0x80)))
#define UTF8MB4_CODE(b0,b1,b2,b3) (((my_wc_t) ((uchar) b0 & 0x07) << 18) |\
((my_wc_t) ((uchar) b1 ^ 0x80) << 12) |\
((my_wc_t) ((uchar) b2 ^ 0x80) << 6) |\
(my_wc_t) ((uchar) b3 ^ 0x80))
/* Definitions for strcoll.ic */
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
......@@ -4981,42 +4912,7 @@ static const uchar to_upper_utf8[] = {
static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t * pwc, const uchar *s, const uchar *e)
{
uchar c;
if (s >= e)
return MY_CS_TOOSMALL;
c= s[0];
if (c < 0x80)
{
*pwc = c;
return 1;
}
else if (c < 0xc2)
return MY_CS_ILSEQ;
else if (c < 0xe0)
{
if (s+2 > e) /* We need 2 characters */
return MY_CS_TOOSMALL2;
if (!(IS_CONTINUATION_BYTE(s[1])))
return MY_CS_ILSEQ;
*pwc= UTF8MB2_CODE(c, s[1]);
return 2;
}
else if (c < 0xf0)
{
if (s+3 > e) /* We need 3 characters */
return MY_CS_TOOSMALL3;
if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
return MY_CS_ILSEQ;
*pwc= UTF8MB3_CODE(c, s[1], s[2]);
return 3;
}
return MY_CS_ILSEQ;
return my_mb_wc_utf8mb3_quick(pwc, s, e);
}
......@@ -7379,52 +7275,7 @@ static int
my_mb_wc_utf8mb4(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t * pwc, const uchar *s, const uchar *e)
{
uchar c;
if (s >= e)
return MY_CS_TOOSMALL;
c= s[0];
if (c < 0x80)
{
*pwc= c;
return 1;
}
else if (c < 0xc2)
return MY_CS_ILSEQ;
else if (c < 0xe0)
{
if (s + 2 > e) /* We need 2 characters */
return MY_CS_TOOSMALL2;
if (!(IS_CONTINUATION_BYTE(s[1])))
return MY_CS_ILSEQ;
*pwc= UTF8MB2_CODE(c, s[1]);
return 2;
}
else if (c < 0xf0)
{
if (s + 3 > e) /* We need 3 characters */
return MY_CS_TOOSMALL3;
if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
return MY_CS_ILSEQ;
*pwc= UTF8MB3_CODE(c, s[1], s[2]);
return 3;
}
else if (c < 0xf5)
{
if (s + 4 > e) /* We need 4 characters */
return MY_CS_TOOSMALL4;
if (!IS_UTF8MB4_STEP2(c, s[1], s[2], s[3]))
return MY_CS_ILSEQ;
*pwc= UTF8MB4_CODE(c, s[1], s[2], s[3]);
return 4;
}
return MY_CS_ILSEQ;
return my_mb_wc_utf8mb4_quick(pwc, s, e);
}
......
/*
Copyright (c) 2018 MariaDB Corporation
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef _CTYPE_UTF8_H
#define _CTYPE_UTF8_H
/* Detect special bytes and sequences */
#define IS_CONTINUATION_BYTE(c) (((uchar) (c) ^ 0x80) < 0x40)
/*
Check MB2 character assuming that b0 is alredy known to be >= 0xC2.
Use this macro if the caller already checked b0 for:
- an MB1 character
- an unused gap between MB1 and MB2HEAD
*/
#define IS_UTF8MB2_STEP2(b0,b1) (((uchar) (b0) < 0xE0) && \
IS_CONTINUATION_BYTE((uchar) b1))
/*
Check MB3 character assuming that b0 is already known to be
in the valid MB3HEAD range [0xE0..0xEF].
*/
#define IS_UTF8MB3_STEP2(b0,b1,b2) (IS_CONTINUATION_BYTE(b1) && \
IS_CONTINUATION_BYTE(b2) && \
((uchar) b0 >= 0xe1 || (uchar) b1 >= 0xa0))
/*
Check MB3 character assuming that b0 is already known to be >= 0xE0,
but is not checked for the high end 0xF0 yet.
Use this macro if the caller already checked b0 for:
- an MB1 character
- an unused gap between MB1 and MB2HEAD
- an MB2HEAD
*/
#define IS_UTF8MB3_STEP3(b0,b1,b2) (((uchar) (b0) < 0xF0) && \
IS_UTF8MB3_STEP2(b0,b1,b2))
/*
UTF-8 quick four-byte mask:
11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
Encoding allows to encode U+00010000..U+001FFFFF
The maximum character defined in the Unicode standard is U+0010FFFF.
Higher characters U+00110000..U+001FFFFF are not used.
11110000.10010000.10xxxxxx.10xxxxxx == F0.90.80.80 == U+00010000 (min)
11110100.10001111.10111111.10111111 == F4.8F.BF.BF == U+0010FFFF (max)
Valid codes:
[F0][90..BF][80..BF][80..BF]
[F1][80..BF][80..BF][80..BF]
[F2][80..BF][80..BF][80..BF]
[F3][80..BF][80..BF][80..BF]
[F4][80..8F][80..BF][80..BF]
*/
/*
Check MB4 character assuming that b0 is already
known to be in the range [0xF0..0xF4]
*/
#define IS_UTF8MB4_STEP2(b0,b1,b2,b3) (IS_CONTINUATION_BYTE(b1) && \
IS_CONTINUATION_BYTE(b2) && \
IS_CONTINUATION_BYTE(b3) && \
(b0 >= 0xf1 || b1 >= 0x90) && \
(b0 <= 0xf3 || b1 <= 0x8F))
#define IS_UTF8MB4_STEP3(b0,b1,b2,b3) (((uchar) (b0) < 0xF5) && \
IS_UTF8MB4_STEP2(b0,b1,b2,b3))
/* Convert individual bytes to Unicode code points */
#define UTF8MB2_CODE(b0,b1) (((my_wc_t) ((uchar) b0 & 0x1f) << 6) |\
((my_wc_t) ((uchar) b1 ^ 0x80)))
#define UTF8MB3_CODE(b0,b1,b2) (((my_wc_t) ((uchar) b0 & 0x0f) << 12) |\
((my_wc_t) ((uchar) b1 ^ 0x80) << 6) |\
((my_wc_t) ((uchar) b2 ^ 0x80)))
#define UTF8MB4_CODE(b0,b1,b2,b3) (((my_wc_t) ((uchar) b0 & 0x07) << 18) |\
((my_wc_t) ((uchar) b1 ^ 0x80) << 12) |\
((my_wc_t) ((uchar) b2 ^ 0x80) << 6) |\
(my_wc_t) ((uchar) b3 ^ 0x80))
static inline int
my_mb_wc_utf8mb3_quick(my_wc_t * pwc, const uchar *s, const uchar *e)
{
uchar c;
if (s >= e)
return MY_CS_TOOSMALL;
c= s[0];
if (c < 0x80)
{
*pwc = c;
return 1;
}
else if (c < 0xc2)
return MY_CS_ILSEQ;
else if (c < 0xe0)
{
if (s+2 > e) /* We need 2 characters */
return MY_CS_TOOSMALL2;
if (!(IS_CONTINUATION_BYTE(s[1])))
return MY_CS_ILSEQ;
*pwc= UTF8MB2_CODE(c, s[1]);
return 2;
}
else if (c < 0xf0)
{
if (s+3 > e) /* We need 3 characters */
return MY_CS_TOOSMALL3;
if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
return MY_CS_ILSEQ;
*pwc= UTF8MB3_CODE(c, s[1], s[2]);
return 3;
}
return MY_CS_ILSEQ;
}
#ifdef HAVE_CHARSET_utf8mb4
static inline int
my_mb_wc_utf8mb4_quick(my_wc_t *pwc, const uchar *s, const uchar *e)
{
uchar c;
if (s >= e)
return MY_CS_TOOSMALL;
c= s[0];
if (c < 0x80)
{
*pwc= c;
return 1;
}
else if (c < 0xc2)
return MY_CS_ILSEQ;
else if (c < 0xe0)
{
if (s + 2 > e) /* We need 2 characters */
return MY_CS_TOOSMALL2;
if (!(IS_CONTINUATION_BYTE(s[1])))
return MY_CS_ILSEQ;
*pwc= UTF8MB2_CODE(c, s[1]);
return 2;
}
else if (c < 0xf0)
{
if (s + 3 > e) /* We need 3 characters */
return MY_CS_TOOSMALL3;
if (!IS_UTF8MB3_STEP2(c, s[1], s[2]))
return MY_CS_ILSEQ;
*pwc= UTF8MB3_CODE(c, s[1], s[2]);
return 3;
}
else if (c < 0xf5)
{
if (s + 4 > e) /* We need 4 characters */
return MY_CS_TOOSMALL4;
if (!IS_UTF8MB4_STEP2(c, s[1], s[2], s[3]))
return MY_CS_ILSEQ;
*pwc= UTF8MB4_CODE(c, s[1], s[2], s[3]);
return 4;
}
return MY_CS_ILSEQ;
}
#endif /* HAVE_CHARSET_utf8mb4*/
#endif /* _CTYPE_UTF8_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment