Commit 10bb09e4 authored by unknown's avatar unknown

CHARSET_INFO::instr was extended to return more substring match results:

- offset of substr begining
- offset of substr end
- number of characters (MB compatible)

parent e81d45c1
...@@ -75,6 +75,12 @@ typedef struct my_uni_idx_st ...@@ -75,6 +75,12 @@ typedef struct my_uni_idx_st
uchar *tab; uchar *tab;
} MY_UNI_IDX; } MY_UNI_IDX;
typedef struct
{
uint beg;
uint end;
uint mblen;
} my_match_t;
enum my_lex_states enum my_lex_states
{ {
...@@ -116,9 +122,10 @@ typedef struct my_collation_handler_st ...@@ -116,9 +122,10 @@ typedef struct my_collation_handler_st
int (*strcasecmp)(struct charset_info_st *, const char *, const char *); int (*strcasecmp)(struct charset_info_st *, const char *, const char *);
int (*instr)(struct charset_info_st *, uint (*instr)(struct charset_info_st *,
const char *big, uint b_length, const char *big, uint b_length,
const char *small, uint s_length); const char *small, uint s_length,
my_match_t *match, uint nmatch);
/* Hash calculation */ /* Hash calculation */
void (*hash_sort)(struct charset_info_st *cs, const uchar *key, uint len, void (*hash_sort)(struct charset_info_st *cs, const uchar *key, uint len,
...@@ -249,9 +256,10 @@ extern void my_hash_sort_simple(CHARSET_INFO *cs, ...@@ -249,9 +256,10 @@ extern void my_hash_sort_simple(CHARSET_INFO *cs,
extern uint my_lengthsp_8bit(CHARSET_INFO *cs, const char *ptr, uint length); extern uint my_lengthsp_8bit(CHARSET_INFO *cs, const char *ptr, uint length);
extern int my_instr_simple(struct charset_info_st *, extern uint my_instr_simple(struct charset_info_st *,
const char *big, uint b_length, const char *big, uint b_length,
const char *small, uint s_length); const char *small, uint s_length,
my_match_t *match, uint nmatch);
/* Functions for 8bit */ /* Functions for 8bit */
...@@ -317,9 +325,10 @@ int my_wildcmp_mb(CHARSET_INFO *, ...@@ -317,9 +325,10 @@ int my_wildcmp_mb(CHARSET_INFO *,
int escape, int w_one, int w_many); int escape, int w_one, int w_many);
uint my_numchars_mb(CHARSET_INFO *, const char *b, const char *e); uint my_numchars_mb(CHARSET_INFO *, const char *b, const char *e);
uint my_charpos_mb(CHARSET_INFO *, const char *b, const char *e, uint pos); uint my_charpos_mb(CHARSET_INFO *, const char *b, const char *e, uint pos);
int my_instr_mb(struct charset_info_st *, uint my_instr_mb(struct charset_info_st *,
const char *big, uint b_length, const char *big, uint b_length,
const char *small, uint s_length); const char *small, uint s_length,
my_match_t *match, uint nmatch);
extern my_bool my_parse_charset_xml(const char *bug, uint len, extern my_bool my_parse_charset_xml(const char *bug, uint len,
......
...@@ -1161,7 +1161,7 @@ longlong Item_func_locate::val_int() ...@@ -1161,7 +1161,7 @@ longlong Item_func_locate::val_int()
null_value=0; null_value=0;
uint start=0; uint start=0;
uint start0=0; uint start0=0;
int ind; my_match_t match;
if (arg_count == 3) if (arg_count == 3)
{ {
...@@ -1175,11 +1175,12 @@ longlong Item_func_locate::val_int() ...@@ -1175,11 +1175,12 @@ longlong Item_func_locate::val_int()
if (!b->length()) // Found empty string at start if (!b->length()) // Found empty string at start
return (longlong) (start+1); return (longlong) (start+1);
ind= cmp_collation.collation->coll->instr(cmp_collation.collation, if (!cmp_collation.collation->coll->instr(cmp_collation.collation,
a->ptr()+start, a->length()-start, a->ptr()+start, a->length()-start,
b->ptr(), b->length()); b->ptr(), b->length(),
&match, 1))
return (longlong) (ind >= 0 ? ind + start0 + 1 : ind + 1); return 0;
return (longlong) match.mblen + start0 + 1;
} }
......
...@@ -263,16 +263,25 @@ static int my_strnxfrm_bin(CHARSET_INFO *cs __attribute__((unused)), ...@@ -263,16 +263,25 @@ static int my_strnxfrm_bin(CHARSET_INFO *cs __attribute__((unused)),
} }
static static
int my_instr_bin(CHARSET_INFO *cs __attribute__((unused)), uint my_instr_bin(CHARSET_INFO *cs __attribute__((unused)),
const char *big, uint b_length, const char *big, uint b_length,
const char *small, uint s_length) const char *small, uint s_length,
my_match_t *match, uint nmatch)
{ {
register const uchar *str, *search, *end, *search_end; register const uchar *str, *search, *end, *search_end;
if (s_length <= b_length) if (s_length <= b_length)
{ {
if (!s_length) if (!s_length)
return 0; /* Empty string is always found */ {
if (nmatch)
{
match->beg= 0;
match->end= 0;
match->mblen= 0;
}
return 1; /* Empty string is always found */
}
str= (const uchar*) big; str= (const uchar*) big;
search= (const uchar*) small; search= (const uchar*) small;
...@@ -293,11 +302,24 @@ int my_instr_bin(CHARSET_INFO *cs __attribute__((unused)), ...@@ -293,11 +302,24 @@ int my_instr_bin(CHARSET_INFO *cs __attribute__((unused)),
if ((*i++) != (*j++)) if ((*i++) != (*j++))
goto skipp; goto skipp;
return (int) (str- (const uchar*)big) -1; if (nmatch > 0)
{
match[0].beg= 0;
match[0].end= str- (const uchar*)big-1;
match[0].mblen= match[0].end;
if (nmatch > 1)
{
match[1].beg= match[0].end;
match[1].end= match[0].end+s_length;
match[1].mblen= match[1].end-match[1].beg;
}
}
return 2;
} }
} }
} }
return -1; return 0;
} }
......
...@@ -274,18 +274,28 @@ uint my_charpos_mb(CHARSET_INFO *cs __attribute__((unused)), ...@@ -274,18 +274,28 @@ uint my_charpos_mb(CHARSET_INFO *cs __attribute__((unused)),
return b-b0; return b-b0;
} }
int my_instr_mb(CHARSET_INFO *cs, uint my_instr_mb(CHARSET_INFO *cs,
const char *big, uint b_length, const char *big, uint b_length,
const char *small, uint s_length) const char *small, uint s_length,
my_match_t *match, uint nmatch)
{ {
register const char *end; register const char *end, *big0;
int res= 0; int res= 0;
if (s_length <= b_length) if (s_length <= b_length)
{ {
if (!s_length) if (!s_length)
return 0; // Empty string is always found {
if (nmatch)
{
match->beg= 0;
match->end= 0;
match->mblen= 0;
}
return 1; // Empty string is always found
}
big0= big;
end= big+b_length-s_length+1; end= big+b_length-s_length+1;
while (big < end) while (big < end)
...@@ -294,15 +304,28 @@ int my_instr_mb(CHARSET_INFO *cs, ...@@ -294,15 +304,28 @@ int my_instr_mb(CHARSET_INFO *cs,
if (!cs->coll->strnncoll(cs, (unsigned char*) big, s_length, if (!cs->coll->strnncoll(cs, (unsigned char*) big, s_length,
(unsigned char*) small, s_length)) (unsigned char*) small, s_length))
return res; {
if (nmatch)
{
match[0].beg= big0;
match[0].end= big-big0;
match[0].mblen= res;
if (nmatch > 1)
{
match[1].beg= match[0].end;
match[1].end= match[0].end+s_length;
match[1].mblen= 0; /* Not computed */
}
}
return 2;
}
mblen= (mblen= my_ismbchar(cs, big, end)) ? mblen : 1; mblen= (mblen= my_ismbchar(cs, big, end)) ? mblen : 1;
big+= mblen; big+= mblen;
b_length-= mblen; b_length-= mblen;
res++; res++;
} }
} }
return -1; return 0;
} }
/* BINARY collations handlers for MB charsets */ /* BINARY collations handlers for MB charsets */
......
...@@ -1030,16 +1030,25 @@ uint my_lengthsp_8bit(CHARSET_INFO *cs __attribute__((unused)), ...@@ -1030,16 +1030,25 @@ uint my_lengthsp_8bit(CHARSET_INFO *cs __attribute__((unused)),
} }
int my_instr_simple(CHARSET_INFO *cs, uint my_instr_simple(CHARSET_INFO *cs,
const char *big, uint b_length, const char *big, uint b_length,
const char *small, uint s_length) const char *small, uint s_length,
my_match_t *match, uint nmatch)
{ {
register const uchar *str, *search, *end, *search_end; register const uchar *str, *search, *end, *search_end;
if (s_length <= b_length) if (s_length <= b_length)
{ {
if (!s_length) if (!s_length)
return 0; // Empty string is always found {
if (nmatch)
{
match->beg= 0;
match->end= 0;
match->mblen= 0;
}
return 1; /* Empty string is always found */
}
str= (const uchar*) big; str= (const uchar*) big;
search= (const uchar*) small; search= (const uchar*) small;
...@@ -1060,11 +1069,24 @@ int my_instr_simple(CHARSET_INFO *cs, ...@@ -1060,11 +1069,24 @@ int my_instr_simple(CHARSET_INFO *cs,
if (cs->sort_order[*i++] != cs->sort_order[*j++]) if (cs->sort_order[*i++] != cs->sort_order[*j++])
goto skipp; goto skipp;
return (int) (str- (const uchar*)big) -1; if (nmatch > 0)
{
match[0].beg= 0;
match[0].end= str- (const uchar*)big-1;
match[0].mblen= match[0].end;
if (nmatch > 1)
{
match[1].beg= match[0].end;
match[1].end= match[0].end+s_length;
match[1].mblen= match[1].end-match[1].beg;
}
}
return 2;
} }
} }
} }
return -1; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment