Commit 9a057fd0 authored by serg@serg.mylan's avatar serg@serg.mylan

ft1->ft2 auto-conversion on INSERT (WL#725)

parent 630eb4ae
......@@ -21,13 +21,6 @@
#include "ftdefs.h"
#include <math.h>
/**************************************************************
This is to make ft-code to ignore keyseg.length at all *
and to index the whole VARCHAR/BLOB instead... */
#undef set_if_smaller
#define set_if_smaller(A,B) /* no op */
/**************************************************************/
void _mi_ft_segiterator_init(MI_INFO *info, uint keynr, const byte *record,
FT_SEG_ITERATOR *ftsi)
{
......@@ -88,7 +81,6 @@ uint _mi_ft_segiterator(register FT_SEG_ITERATOR *ftsi)
{
ftsi->len=uint2korr(ftsi->pos);
ftsi->pos+=2; /* Skip VARCHAR length */
set_if_smaller(ftsi->len,ftsi->seg->length);
DBUG_RETURN(1);
}
if (ftsi->seg->flag & HA_BLOB_PART)
......@@ -96,7 +88,6 @@ uint _mi_ft_segiterator(register FT_SEG_ITERATOR *ftsi)
ftsi->len=_mi_calc_blob_length(ftsi->seg->bit_start,ftsi->pos);
memcpy_fixed((char*) &ftsi->pos, ftsi->pos+ftsi->seg->bit_start,
sizeof(char*));
set_if_smaller(ftsi->len,ftsi->seg->length);
DBUG_RETURN(1);
}
ftsi->len=ftsi->seg->length;
......@@ -305,3 +296,53 @@ uint _ft_make_key(MI_INFO *info, uint keynr, byte *keybuf, FT_WORD *wptr,
memcpy(buf+HA_FT_WLEN+2,wptr->pos,wptr->len);
DBUG_RETURN(_mi_make_key(info,keynr,(uchar*) keybuf,buf,filepos));
}
/*
convert key value to ft2
*/
uint _mi_ft_convert_to_ft2(MI_INFO *info, uint keynr, uchar *key)
{
my_off_t root;
DYNAMIC_ARRAY *da=info->ft1_to_ft2;
MI_KEYDEF *keyinfo=&info->s->ft2_keyinfo;
uchar *key_ptr=dynamic_array_ptr(da, 0), *end;
uint length, key_length;
DBUG_ENTER("_mi_ft_convert_to_ft2");
/* we'll generate one pageful at once, and insert the rest one-by-one */
/* calculating the length of this page ...*/
length=(keyinfo->block_length-2) / keyinfo->keylength;
set_if_smaller(length, da->elements);
length=length * keyinfo->keylength;
get_key_full_length_rdonly(key_length, key);
while (_mi_ck_delete(info, keynr, key, key_length) == 0)
/* nothing to do here.
_mi_ck_delete() will populate info->ft1_to_ft2 with deleted keys
*/;
/* creating pageful of keys */
mi_putint(info->buff,length+2,0);
memcpy(info->buff+2, key_ptr, length);
info->buff_used=info->page_changed=1; /* info->buff is used */
if ((root= _mi_new(info,keyinfo)) == HA_OFFSET_ERROR ||
_mi_write_keypage(info,keyinfo,root,info->buff))
DBUG_RETURN(-1);
/* inserting the rest of key values */
end=dynamic_array_ptr(da, da->elements);
for (key_ptr+=length; key_ptr < end; key_ptr+=keyinfo->keylength)
if(_mi_ck_real_write_btree(info, keyinfo, key_ptr, 0, &root, SEARCH_SAME))
DBUG_RETURN(-1);
/* now, writing the word key entry */
ft_intXstore(key+key_length, -da->elements);
_mi_dpointer(info, key+key_length+HA_FT_WLEN, root);
DBUG_RETURN(_mi_ck_real_write_btree(info,
info->s->keyinfo+keynr,
key, 0,
&info->s->state.key_root[keynr],
SEARCH_SAME));
}
......@@ -34,3 +34,5 @@ int _mi_ft_cmp(MI_INFO *, uint, const byte *, const byte *);
int _mi_ft_add(MI_INFO *, uint, byte *, const byte *, my_off_t);
int _mi_ft_del(MI_INFO *, uint, byte *, const byte *, my_off_t);
uint _mi_ft_convert_to_ft2(MI_INFO *, uint, uchar *);
......@@ -18,6 +18,7 @@
#include "fulltext.h"
#include "rt_index.h"
#include <assert.h>
#ifdef __WIN__
#include <errno.h>
......@@ -231,13 +232,22 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
get_key_full_length_rdonly(off, lastkey);
subkeys=ft_sintXkorr(lastkey+off);
DBUG_ASSERT(info->ft1_to_ft2==0 || subkeys >=0);
comp_flag=SEARCH_SAME;
if (subkeys >= 0)
{
/* normal word, one-level tree structure */
DBUG_PRINT("info",("FT1"));
if (info->ft1_to_ft2)
{
/* we're in ft1->ft2 conversion mode. Saving key data */
insert_dynamic(info->ft1_to_ft2, lastkey+off);
}
else
{
/* we need exact match only if not in ft1->ft2 conversion mode */
flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key,USE_WHOLE_KEY,
comp_flag, &keypos, lastkey, &last_key);
}
/* fall through to normal delete */
}
else
......@@ -252,13 +262,11 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
if (subkeys == -1)
{
/* the last entry in sub-tree */
DBUG_PRINT("info",("FT2: the last entry"));
_mi_dispose(info, keyinfo, root);
/* fall through to normal delete */
}
else
{
DBUG_PRINT("info",("FT2: going down"));
keyinfo=&info->s->ft2_keyinfo;
kpos-=keyinfo->keylength+nod_flag; /* we'll modify key entry 'in vivo' */
key+=off;
......
......@@ -536,6 +536,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
info.lock_type=F_UNLCK;
info.quick_mode=0;
info.bulk_insert=0;
info.ft1_to_ft2=0;
info.errkey= -1;
info.page_changed=1;
pthread_mutex_lock(&share->intern_lock);
......
......@@ -18,6 +18,7 @@
#include "fulltext.h"
#include "rt_index.h"
#include <assert.h>
#ifdef __WIN__
#include <errno.h>
......@@ -264,13 +265,32 @@ int _mi_ck_write_btree(register MI_INFO *info, uint keynr, uchar *key,
else
comp_flag=SEARCH_SAME; /* Keys in rec-pos order */
error=_mi_ck_real_write_btree(info, keyinfo, key, key_length,
root, comp_flag);
if (info->ft1_to_ft2)
{
if (!error)
error= _mi_ft_convert_to_ft2(info, keynr, key);
delete_dynamic(info->ft1_to_ft2);
my_free(info->ft1_to_ft2, MYF(0));
info->ft1_to_ft2=0;
}
DBUG_RETURN(error);
} /* _mi_ck_write_btree */
int _mi_ck_real_write_btree(MI_INFO *info, MI_KEYDEF *keyinfo,
uchar *key, uint key_length, my_off_t *root, uint comp_flag)
{
int error;
DBUG_ENTER("_mi_ck_real_write_btree");
/* key_length parameter is used only if comp_flag is SEARCH_FIND */
if (*root == HA_OFFSET_ERROR ||
(error=w_search(info, keyinfo, comp_flag, key, key_length,
*root, (uchar *) 0, (uchar*) 0,
(my_off_t) 0, 1)) > 0)
error=_mi_enlarge_root(info,keyinfo,key,root);
DBUG_RETURN(error);
} /* _mi_ck_write_btree */
} /* _mi_ck_real_write_btree */
/* Make a new root with key as only pointer */
......@@ -359,13 +379,11 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
keyinfo=&info->s->ft2_keyinfo;
key+=off;
keypos-=keyinfo->keylength; /* we'll modify key entry 'in vivo' */
if ((error=w_search(info, keyinfo, comp_flag, key, HA_FT_WLEN, root,
(uchar *) 0, (uchar*) 0, (my_off_t) 0, 1)) > 0)
{
error=_mi_enlarge_root(info, keyinfo, key, &root);
error=_mi_ck_real_write_btree(info, keyinfo, key, 0,
&root, comp_flag);
_mi_dpointer(info, keypos+HA_FT_WLEN, root);
}
subkeys--; /* should there be underflow protection ? */
DBUG_ASSERT(subkeys < 0);
ft_intXstore(keypos, subkeys);
if (!error)
error=_mi_write_keypage(info,keyinfo,page,temp_buff);
......@@ -410,7 +428,6 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo,
uchar *key, uchar *anc_buff, uchar *key_pos, uchar *key_buff,
uchar *father_buff, uchar *father_key_pos, my_off_t father_page,
my_bool insert_last)
{
uint a_length,nod_flag;
int t_length;
......@@ -464,8 +481,56 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo,
a_length+=t_length;
mi_putint(anc_buff,a_length,nod_flag);
if (a_length <= keyinfo->block_length)
DBUG_RETURN(0); /* There is room on page */
{
if (keyinfo->block_length - a_length < 32 &&
keyinfo->flag & HA_FULLTEXT && key_pos == endpos &&
info->s->base.key_reflength <= info->s->base.rec_reflength &&
info->s->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD))
{
/*
Normal word. One-level tree. Page is almost full.
Let's consider converting.
We'll compare 'key' and the first key at anc_buff
*/
uchar *a=key, *b=anc_buff+2+nod_flag;
uint alen, blen, ft2len=info->s->ft2_keyinfo.keylength;
/* the very first key on the page is always unpacked */
DBUG_ASSERT((*b & 128) == 0);
#if HA_FT_MAXLEN >= 127
blen= mi_uint2korr(b); b+=2;
#else
blen= *b++;
#endif
get_key_length(alen,a);
DBUG_ASSERT(info->ft1_to_ft2==0);
if (alen == blen &&
mi_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0)==0)
{
/* yup. converting */
info->ft1_to_ft2=(DYNAMIC_ARRAY *)
my_malloc(sizeof(DYNAMIC_ARRAY), MYF(MY_WME));
my_init_dynamic_array(info->ft1_to_ft2, ft2len, 300, 50);
/*
now, adding all keys from the page to dynarray
if the page is a leaf (if not keys will be deleted later)
*/
if (!nod_flag)
{
/* let's leave the first key on the page, though, because
we cannot easily dispatch an empty page here */
b+=blen+ft2len+2;
for (a=anc_buff+a_length ; b < a ; b+=ft2len+2)
insert_dynamic(info->ft1_to_ft2, b);
/* fixing the page's length - it contains only one key now */
mi_putint(anc_buff,2+blen+ft2len+2,0);
}
/* the rest will be done when we're back from recursion */
}
}
DBUG_RETURN(0); /* There is room on page */
}
/* Page is full */
if (nod_flag)
insert_last=0;
......
......@@ -223,6 +223,7 @@ struct st_myisam_info {
MI_BIT_BUFF bit_buff;
/* accumulate indexfile changes between write's */
TREE *bulk_insert;
DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */
char *filename; /* parameter to open filename */
uchar *buff, /* Temp area for key */
*lastkey,*lastkey2; /* Last used search key */
......@@ -464,6 +465,9 @@ extern int _mi_delete_static_record(MI_INFO *info);
extern int _mi_cmp_static_record(MI_INFO *info,const byte *record);
extern int _mi_read_rnd_static_record(MI_INFO*, byte *,my_off_t, my_bool);
extern int _mi_ck_write(MI_INFO *info,uint keynr,uchar *key,uint length);
extern int _mi_ck_real_write_btree(MI_INFO *info, MI_KEYDEF *keyinfo,
uchar *key, uint key_length,
my_off_t *root, uint comp_flag);
extern int _mi_enlarge_root(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, my_off_t *root);
extern int _mi_insert(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key,
uchar *anc_buff,uchar *key_pos,uchar *key_buff,
......
......@@ -103,4 +103,106 @@ count(*)
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
count(*)
262
DROP TABLE IF EXISTS t1;
drop table t1;
CREATE TABLE t1 (
i int(10) unsigned not null auto_increment primary key,
a varchar(255) not null,
FULLTEXT KEY (a)
) TYPE=MyISAM;
select count(*) from t1 where match a against ('aaaxxx');
count(*)
260
select count(*) from t1 where match a against ('aaayyy');
count(*)
250
select count(*) from t1 where match a against ('aaazzz');
count(*)
255
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
count(*)
260
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
count(*)
250
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
count(*)
255
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz');
count(*)
765
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz' in boolean mode);
count(*)
765
select count(*) from t1 where match a against ('aaax*' in boolean mode);
count(*)
260
select count(*) from t1 where match a against ('aaay*' in boolean mode);
count(*)
250
select count(*) from t1 where match a against ('aaa*' in boolean mode);
count(*)
765
insert t1 (a) values ('aaaxxx'),('aaayyy');
insert t1 (a) values ('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz');
select count(*) from t1 where match a against ('aaaxxx');
count(*)
261
select count(*) from t1 where match a against ('aaayyy');
count(*)
251
select count(*) from t1 where match a against ('aaazzz');
count(*)
260
insert t1 (a) values ('aaaxxx 000000');
select count(*) from t1 where match a against ('000000');
count(*)
1
delete from t1 where match a against ('000000');
select count(*) from t1 where match a against ('000000');
count(*)
0
select count(*) from t1 where match a against ('aaaxxx');
count(*)
261
delete from t1 where match a against ('aaazzz');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
count(*)
261
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
count(*)
251
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
count(*)
0
select count(*) from t1 where a = 'aaaxxx';
count(*)
261
select count(*) from t1 where a = 'aaayyy';
count(*)
251
select count(*) from t1 where a = 'aaazzz';
count(*)
0
insert t1 (a) values ('aaaxxx 000000');
select count(*) from t1 where match a against ('000000');
count(*)
1
update t1 set a='aaazzz' where match a against ('000000');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
count(*)
261
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
count(*)
1
update t1 set a='aaazzz' where a = 'aaaxxx';
update t1 set a='aaaxxx' where a = 'aaayyy';
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
count(*)
251
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
count(*)
0
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
count(*)
262
drop table t1;
......@@ -94,5 +94,83 @@ select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
DROP TABLE IF EXISTS t1;
drop table t1;
CREATE TABLE t1 (
i int(10) unsigned not null auto_increment primary key,
a varchar(255) not null,
FULLTEXT KEY (a)
) TYPE=MyISAM;
# two-level entry, second-level tree with depth 2
--disable_query_log
let $1=260;
while ($1)
{
eval insert t1 (a) values ('aaaxxx');
dec $1;
}
let $1=255;
while ($1)
{
eval insert t1 (a) values ('aaazzz');
dec $1;
}
let $1=250;
while ($1)
{
eval insert t1 (a) values ('aaayyy');
dec $1;
}
--enable_query_log
select count(*) from t1 where match a against ('aaaxxx');
select count(*) from t1 where match a against ('aaayyy');
select count(*) from t1 where match a against ('aaazzz');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz');
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz' in boolean mode);
select count(*) from t1 where match a against ('aaax*' in boolean mode);
select count(*) from t1 where match a against ('aaay*' in boolean mode);
select count(*) from t1 where match a against ('aaa*' in boolean mode);
# mi_write:
insert t1 (a) values ('aaaxxx'),('aaayyy');
insert t1 (a) values ('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz');
select count(*) from t1 where match a against ('aaaxxx');
select count(*) from t1 where match a against ('aaayyy');
select count(*) from t1 where match a against ('aaazzz');
# mi_delete
insert t1 (a) values ('aaaxxx 000000');
select count(*) from t1 where match a against ('000000');
delete from t1 where match a against ('000000');
select count(*) from t1 where match a against ('000000');
select count(*) from t1 where match a against ('aaaxxx');
delete from t1 where match a against ('aaazzz');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
# double-check without index
select count(*) from t1 where a = 'aaaxxx';
select count(*) from t1 where a = 'aaayyy';
select count(*) from t1 where a = 'aaazzz';
# update
insert t1 (a) values ('aaaxxx 000000');
select count(*) from t1 where match a against ('000000');
update t1 set a='aaazzz' where match a against ('000000');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
update t1 set a='aaazzz' where a = 'aaaxxx';
update t1 set a='aaaxxx' where a = 'aaayyy';
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
drop table t1;
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment