Commit 9a057fd0 authored by serg@serg.mylan's avatar serg@serg.mylan

ft1->ft2 auto-conversion on INSERT (WL#725)

parent 630eb4ae
......@@ -21,13 +21,6 @@
#include "ftdefs.h"
#include <math.h>
/**************************************************************
This is to make ft-code to ignore keyseg.length at all *
and to index the whole VARCHAR/BLOB instead... */
#undef set_if_smaller
#define set_if_smaller(A,B) /* no op */
/**************************************************************/
void _mi_ft_segiterator_init(MI_INFO *info, uint keynr, const byte *record,
FT_SEG_ITERATOR *ftsi)
{
......@@ -88,7 +81,6 @@ uint _mi_ft_segiterator(register FT_SEG_ITERATOR *ftsi)
{
ftsi->len=uint2korr(ftsi->pos);
ftsi->pos+=2; /* Skip VARCHAR length */
set_if_smaller(ftsi->len,ftsi->seg->length);
DBUG_RETURN(1);
}
if (ftsi->seg->flag & HA_BLOB_PART)
......@@ -96,7 +88,6 @@ uint _mi_ft_segiterator(register FT_SEG_ITERATOR *ftsi)
ftsi->len=_mi_calc_blob_length(ftsi->seg->bit_start,ftsi->pos);
memcpy_fixed((char*) &ftsi->pos, ftsi->pos+ftsi->seg->bit_start,
sizeof(char*));
set_if_smaller(ftsi->len,ftsi->seg->length);
DBUG_RETURN(1);
}
ftsi->len=ftsi->seg->length;
......@@ -305,3 +296,53 @@ uint _ft_make_key(MI_INFO *info, uint keynr, byte *keybuf, FT_WORD *wptr,
memcpy(buf+HA_FT_WLEN+2,wptr->pos,wptr->len);
DBUG_RETURN(_mi_make_key(info,keynr,(uchar*) keybuf,buf,filepos));
}
/*
convert key value to ft2
*/
uint _mi_ft_convert_to_ft2(MI_INFO *info, uint keynr, uchar *key)
{
my_off_t root;
DYNAMIC_ARRAY *da=info->ft1_to_ft2;
MI_KEYDEF *keyinfo=&info->s->ft2_keyinfo;
uchar *key_ptr=dynamic_array_ptr(da, 0), *end;
uint length, key_length;
DBUG_ENTER("_mi_ft_convert_to_ft2");
/* we'll generate one pageful at once, and insert the rest one-by-one */
/* calculating the length of this page ...*/
length=(keyinfo->block_length-2) / keyinfo->keylength;
set_if_smaller(length, da->elements);
length=length * keyinfo->keylength;
get_key_full_length_rdonly(key_length, key);
while (_mi_ck_delete(info, keynr, key, key_length) == 0)
/* nothing to do here.
_mi_ck_delete() will populate info->ft1_to_ft2 with deleted keys
*/;
/* creating pageful of keys */
mi_putint(info->buff,length+2,0);
memcpy(info->buff+2, key_ptr, length);
info->buff_used=info->page_changed=1; /* info->buff is used */
if ((root= _mi_new(info,keyinfo)) == HA_OFFSET_ERROR ||
_mi_write_keypage(info,keyinfo,root,info->buff))
DBUG_RETURN(-1);
/* inserting the rest of key values */
end=dynamic_array_ptr(da, da->elements);
for (key_ptr+=length; key_ptr < end; key_ptr+=keyinfo->keylength)
if(_mi_ck_real_write_btree(info, keyinfo, key_ptr, 0, &root, SEARCH_SAME))
DBUG_RETURN(-1);
/* now, writing the word key entry */
ft_intXstore(key+key_length, -da->elements);
_mi_dpointer(info, key+key_length+HA_FT_WLEN, root);
DBUG_RETURN(_mi_ck_real_write_btree(info,
info->s->keyinfo+keynr,
key, 0,
&info->s->state.key_root[keynr],
SEARCH_SAME));
}
......@@ -34,3 +34,5 @@ int _mi_ft_cmp(MI_INFO *, uint, const byte *, const byte *);
int _mi_ft_add(MI_INFO *, uint, byte *, const byte *, my_off_t);
int _mi_ft_del(MI_INFO *, uint, byte *, const byte *, my_off_t);
uint _mi_ft_convert_to_ft2(MI_INFO *, uint, uchar *);
......@@ -18,6 +18,7 @@
#include "fulltext.h"
#include "rt_index.h"
#include <assert.h>
#ifdef __WIN__
#include <errno.h>
......@@ -231,13 +232,22 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
get_key_full_length_rdonly(off, lastkey);
subkeys=ft_sintXkorr(lastkey+off);
DBUG_ASSERT(info->ft1_to_ft2==0 || subkeys >=0);
comp_flag=SEARCH_SAME;
if (subkeys >= 0)
{
/* normal word, one-level tree structure */
DBUG_PRINT("info",("FT1"));
flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key,USE_WHOLE_KEY,
comp_flag, &keypos, lastkey, &last_key);
if (info->ft1_to_ft2)
{
/* we're in ft1->ft2 conversion mode. Saving key data */
insert_dynamic(info->ft1_to_ft2, lastkey+off);
}
else
{
/* we need exact match only if not in ft1->ft2 conversion mode */
flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key,USE_WHOLE_KEY,
comp_flag, &keypos, lastkey, &last_key);
}
/* fall through to normal delete */
}
else
......@@ -252,13 +262,11 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
if (subkeys == -1)
{
/* the last entry in sub-tree */
DBUG_PRINT("info",("FT2: the last entry"));
_mi_dispose(info, keyinfo, root);
/* fall through to normal delete */
}
else
{
DBUG_PRINT("info",("FT2: going down"));
keyinfo=&info->s->ft2_keyinfo;
kpos-=keyinfo->keylength+nod_flag; /* we'll modify key entry 'in vivo' */
key+=off;
......
......@@ -513,8 +513,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
NullS))
goto err;
errpos=6;
if (!have_rtree)
if (!have_rtree)
info.rtree_recursion_state= NULL;
strmov(info.filename,org_name);
......@@ -536,6 +536,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
info.lock_type=F_UNLCK;
info.quick_mode=0;
info.bulk_insert=0;
info.ft1_to_ft2=0;
info.errkey= -1;
info.page_changed=1;
pthread_mutex_lock(&share->intern_lock);
......@@ -1112,7 +1113,7 @@ char *mi_recinfo_read(char *ptr, MI_COLUMNDEF *recinfo)
/**************************************************************************
Open data file with or without RAID
We can't use dup() here as the data file descriptors need to have different
We can't use dup() here as the data file descriptors need to have different
active seek-positions.
The argument file_to_dup is here for the future if there would on some OS
......
......@@ -18,6 +18,7 @@
#include "fulltext.h"
#include "rt_index.h"
#include <assert.h>
#ifdef __WIN__
#include <errno.h>
......@@ -124,7 +125,7 @@ int mi_write(MI_INFO *info, byte *record)
else
{
if (share->keyinfo[i].ck_insert(info,i,buff,
_mi_make_key(info,i,buff,record,filepos)))
_mi_make_key(info,i,buff,record,filepos)))
{
if (local_lock_tree)
rw_unlock(&share->key_root_lock[i]);
......@@ -264,13 +265,32 @@ int _mi_ck_write_btree(register MI_INFO *info, uint keynr, uchar *key,
else
comp_flag=SEARCH_SAME; /* Keys in rec-pos order */
error=_mi_ck_real_write_btree(info, keyinfo, key, key_length,
root, comp_flag);
if (info->ft1_to_ft2)
{
if (!error)
error= _mi_ft_convert_to_ft2(info, keynr, key);
delete_dynamic(info->ft1_to_ft2);
my_free(info->ft1_to_ft2, MYF(0));
info->ft1_to_ft2=0;
}
DBUG_RETURN(error);
} /* _mi_ck_write_btree */
int _mi_ck_real_write_btree(MI_INFO *info, MI_KEYDEF *keyinfo,
uchar *key, uint key_length, my_off_t *root, uint comp_flag)
{
int error;
DBUG_ENTER("_mi_ck_real_write_btree");
/* key_length parameter is used only if comp_flag is SEARCH_FIND */
if (*root == HA_OFFSET_ERROR ||
(error=w_search(info, keyinfo, comp_flag, key, key_length,
*root, (uchar *) 0, (uchar*) 0,
(my_off_t) 0, 1)) > 0)
error=_mi_enlarge_root(info,keyinfo,key,root);
DBUG_RETURN(error);
} /* _mi_ck_write_btree */
} /* _mi_ck_real_write_btree */
/* Make a new root with key as only pointer */
......@@ -359,13 +379,11 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo,
keyinfo=&info->s->ft2_keyinfo;
key+=off;
keypos-=keyinfo->keylength; /* we'll modify key entry 'in vivo' */
if ((error=w_search(info, keyinfo, comp_flag, key, HA_FT_WLEN, root,
(uchar *) 0, (uchar*) 0, (my_off_t) 0, 1)) > 0)
{
error=_mi_enlarge_root(info, keyinfo, key, &root);
_mi_dpointer(info, keypos+HA_FT_WLEN, root);
}
error=_mi_ck_real_write_btree(info, keyinfo, key, 0,
&root, comp_flag);
_mi_dpointer(info, keypos+HA_FT_WLEN, root);
subkeys--; /* should there be underflow protection ? */
DBUG_ASSERT(subkeys < 0);
ft_intXstore(keypos, subkeys);
if (!error)
error=_mi_write_keypage(info,keyinfo,page,temp_buff);
......@@ -410,7 +428,6 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo,
uchar *key, uchar *anc_buff, uchar *key_pos, uchar *key_buff,
uchar *father_buff, uchar *father_key_pos, my_off_t father_page,
my_bool insert_last)
{
uint a_length,nod_flag;
int t_length;
......@@ -464,8 +481,56 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo,
a_length+=t_length;
mi_putint(anc_buff,a_length,nod_flag);
if (a_length <= keyinfo->block_length)
{
if (keyinfo->block_length - a_length < 32 &&
keyinfo->flag & HA_FULLTEXT && key_pos == endpos &&
info->s->base.key_reflength <= info->s->base.rec_reflength &&
info->s->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD))
{
/*
Normal word. One-level tree. Page is almost full.
Let's consider converting.
We'll compare 'key' and the first key at anc_buff
*/
uchar *a=key, *b=anc_buff+2+nod_flag;
uint alen, blen, ft2len=info->s->ft2_keyinfo.keylength;
/* the very first key on the page is always unpacked */
DBUG_ASSERT((*b & 128) == 0);
#if HA_FT_MAXLEN >= 127
blen= mi_uint2korr(b); b+=2;
#else
blen= *b++;
#endif
get_key_length(alen,a);
DBUG_ASSERT(info->ft1_to_ft2==0);
if (alen == blen &&
mi_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0)==0)
{
/* yup. converting */
info->ft1_to_ft2=(DYNAMIC_ARRAY *)
my_malloc(sizeof(DYNAMIC_ARRAY), MYF(MY_WME));
my_init_dynamic_array(info->ft1_to_ft2, ft2len, 300, 50);
/*
now, adding all keys from the page to dynarray
if the page is a leaf (if not keys will be deleted later)
*/
if (!nod_flag)
{
/* let's leave the first key on the page, though, because
we cannot easily dispatch an empty page here */
b+=blen+ft2len+2;
for (a=anc_buff+a_length ; b < a ; b+=ft2len+2)
insert_dynamic(info->ft1_to_ft2, b);
/* fixing the page's length - it contains only one key now */
mi_putint(anc_buff,2+blen+ft2len+2,0);
}
/* the rest will be done when we're back from recursion */
}
}
DBUG_RETURN(0); /* There is room on page */
}
/* Page is full */
if (nod_flag)
insert_last=0;
......
......@@ -222,7 +222,8 @@ struct st_myisam_info {
MI_BLOB *blobs; /* Pointer to blobs */
MI_BIT_BUFF bit_buff;
/* accumulate indexfile changes between write's */
TREE *bulk_insert;
TREE *bulk_insert;
DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */
char *filename; /* parameter to open filename */
uchar *buff, /* Temp area for key */
*lastkey,*lastkey2; /* Last used search key */
......@@ -464,6 +465,9 @@ extern int _mi_delete_static_record(MI_INFO *info);
extern int _mi_cmp_static_record(MI_INFO *info,const byte *record);
extern int _mi_read_rnd_static_record(MI_INFO*, byte *,my_off_t, my_bool);
extern int _mi_ck_write(MI_INFO *info,uint keynr,uchar *key,uint length);
extern int _mi_ck_real_write_btree(MI_INFO *info, MI_KEYDEF *keyinfo,
uchar *key, uint key_length,
my_off_t *root, uint comp_flag);
extern int _mi_enlarge_root(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, my_off_t *root);
extern int _mi_insert(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key,
uchar *anc_buff,uchar *key_pos,uchar *key_buff,
......
......@@ -103,4 +103,106 @@ count(*)
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
count(*)
262
DROP TABLE IF EXISTS t1;
drop table t1;
CREATE TABLE t1 (
i int(10) unsigned not null auto_increment primary key,
a varchar(255) not null,
FULLTEXT KEY (a)
) TYPE=MyISAM;
select count(*) from t1 where match a against ('aaaxxx');
count(*)
260
select count(*) from t1 where match a against ('aaayyy');
count(*)
250
select count(*) from t1 where match a against ('aaazzz');
count(*)
255
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
count(*)
260
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
count(*)
250
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
count(*)
255
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz');
count(*)
765
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz' in boolean mode);
count(*)
765
select count(*) from t1 where match a against ('aaax*' in boolean mode);
count(*)
260
select count(*) from t1 where match a against ('aaay*' in boolean mode);
count(*)
250
select count(*) from t1 where match a against ('aaa*' in boolean mode);
count(*)
765
insert t1 (a) values ('aaaxxx'),('aaayyy');
insert t1 (a) values ('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz');
select count(*) from t1 where match a against ('aaaxxx');
count(*)
261
select count(*) from t1 where match a against ('aaayyy');
count(*)
251
select count(*) from t1 where match a against ('aaazzz');
count(*)
260
insert t1 (a) values ('aaaxxx 000000');
select count(*) from t1 where match a against ('000000');
count(*)
1
delete from t1 where match a against ('000000');
select count(*) from t1 where match a against ('000000');
count(*)
0
select count(*) from t1 where match a against ('aaaxxx');
count(*)
261
delete from t1 where match a against ('aaazzz');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
count(*)
261
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
count(*)
251
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
count(*)
0
select count(*) from t1 where a = 'aaaxxx';
count(*)
261
select count(*) from t1 where a = 'aaayyy';
count(*)
251
select count(*) from t1 where a = 'aaazzz';
count(*)
0
insert t1 (a) values ('aaaxxx 000000');
select count(*) from t1 where match a against ('000000');
count(*)
1
update t1 set a='aaazzz' where match a against ('000000');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
count(*)
261
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
count(*)
1
update t1 set a='aaazzz' where a = 'aaaxxx';
update t1 set a='aaaxxx' where a = 'aaayyy';
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
count(*)
251
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
count(*)
0
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
count(*)
262
drop table t1;
......@@ -94,5 +94,83 @@ select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
DROP TABLE IF EXISTS t1;
drop table t1;
CREATE TABLE t1 (
i int(10) unsigned not null auto_increment primary key,
a varchar(255) not null,
FULLTEXT KEY (a)
) TYPE=MyISAM;
# two-level entry, second-level tree with depth 2
--disable_query_log
let $1=260;
while ($1)
{
eval insert t1 (a) values ('aaaxxx');
dec $1;
}
let $1=255;
while ($1)
{
eval insert t1 (a) values ('aaazzz');
dec $1;
}
let $1=250;
while ($1)
{
eval insert t1 (a) values ('aaayyy');
dec $1;
}
--enable_query_log
select count(*) from t1 where match a against ('aaaxxx');
select count(*) from t1 where match a against ('aaayyy');
select count(*) from t1 where match a against ('aaazzz');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz');
select count(*) from t1 where match a against ('aaaxxx aaayyy aaazzz' in boolean mode);
select count(*) from t1 where match a against ('aaax*' in boolean mode);
select count(*) from t1 where match a against ('aaay*' in boolean mode);
select count(*) from t1 where match a against ('aaa*' in boolean mode);
# mi_write:
insert t1 (a) values ('aaaxxx'),('aaayyy');
insert t1 (a) values ('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz'),('aaazzz');
select count(*) from t1 where match a against ('aaaxxx');
select count(*) from t1 where match a against ('aaayyy');
select count(*) from t1 where match a against ('aaazzz');
# mi_delete
insert t1 (a) values ('aaaxxx 000000');
select count(*) from t1 where match a against ('000000');
delete from t1 where match a against ('000000');
select count(*) from t1 where match a against ('000000');
select count(*) from t1 where match a against ('aaaxxx');
delete from t1 where match a against ('aaazzz');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
# double-check without index
select count(*) from t1 where a = 'aaaxxx';
select count(*) from t1 where a = 'aaayyy';
select count(*) from t1 where a = 'aaazzz';
# update
insert t1 (a) values ('aaaxxx 000000');
select count(*) from t1 where match a against ('000000');
update t1 set a='aaazzz' where match a against ('000000');
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
update t1 set a='aaazzz' where a = 'aaaxxx';
update t1 set a='aaaxxx' where a = 'aaayyy';
select count(*) from t1 where match a against ('aaaxxx' in boolean mode);
select count(*) from t1 where match a against ('aaayyy' in boolean mode);
select count(*) from t1 where match a against ('aaazzz' in boolean mode);
drop table t1;
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment