ma_ft_update.c 10.1 KB
Newer Older
1 2 3 4
/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

/* Written by Sergei A. Golubchik, who has a shared copyright to this code */

/* functions to work with full-text indices */

#include "ma_ftdefs.h"
#include <math.h>

unknown's avatar
unknown committed
23
void _ma_ft_segiterator_init(MARIA_HA *info, uint keynr, const uchar *record,
24 25 26 27 28 29 30 31 32 33
			     FT_SEG_ITERATOR *ftsi)
{
  DBUG_ENTER("_ma_ft_segiterator_init");

  ftsi->num=info->s->keyinfo[keynr].keysegs;
  ftsi->seg=info->s->keyinfo[keynr].seg;
  ftsi->rec=record;
  DBUG_VOID_RETURN;
}

unknown's avatar
unknown committed
34
void _ma_ft_segiterator_dummy_init(const uchar *record, uint len,
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
				   FT_SEG_ITERATOR *ftsi)
{
  DBUG_ENTER("_ma_ft_segiterator_dummy_init");

  ftsi->num=1;
  ftsi->seg=0;
  ftsi->pos=record;
  ftsi->len=len;
  DBUG_VOID_RETURN;
}

/*
  This function breaks convention "return 0 in success"
  but it's easier to use like this

     while(_ma_ft_segiterator())

  so "1" means "OK", "0" means "EOF"
*/

uint _ma_ft_segiterator(register FT_SEG_ITERATOR *ftsi)
{
  DBUG_ENTER("_ma_ft_segiterator");

  if (!ftsi->num)
    DBUG_RETURN(0);

  ftsi->num--;
  if (!ftsi->seg)
    DBUG_RETURN(1);

  ftsi->seg--;

  if (ftsi->seg->null_bit &&
      (ftsi->rec[ftsi->seg->null_pos] & ftsi->seg->null_bit))
  {
    ftsi->pos=0;
    DBUG_RETURN(1);
  }
  ftsi->pos= ftsi->rec+ftsi->seg->start;
  if (ftsi->seg->flag & HA_VAR_LENGTH_PART)
  {
    uint pack_length= (ftsi->seg->bit_start);
    ftsi->len= (pack_length == 1 ? (uint) *(uchar*) ftsi->pos :
                uint2korr(ftsi->pos));
    ftsi->pos+= pack_length;			 /* Skip VARCHAR length */
    DBUG_RETURN(1);
  }
  if (ftsi->seg->flag & HA_BLOB_PART)
  {
    ftsi->len= _ma_calc_blob_length(ftsi->seg->bit_start,ftsi->pos);
    memcpy_fixed((char*) &ftsi->pos, ftsi->pos+ftsi->seg->bit_start,
		 sizeof(char*));
    DBUG_RETURN(1);
  }
  ftsi->len=ftsi->seg->length;
  DBUG_RETURN(1);
}


/* parses a document i.e. calls maria_ft_parse for every keyseg */

unknown's avatar
unknown committed
97
uint _ma_ft_parse(TREE *parsed, MARIA_HA *info, uint keynr, const uchar *record,
98
                  MYSQL_FTPARSER_PARAM *param, MEM_ROOT *mem_root)
99 100 101 102 103 104 105 106 107 108 109 110
{
  FT_SEG_ITERATOR ftsi;
  struct st_mysql_ftparser *parser;
  DBUG_ENTER("_ma_ft_parse");

  _ma_ft_segiterator_init(info, keynr, record, &ftsi);

  maria_ft_parse_init(parsed, info->s->keyinfo[keynr].seg->charset);
  parser= info->s->keyinfo[keynr].parser;
  while (_ma_ft_segiterator(&ftsi))
  {
    if (ftsi.pos)
unknown's avatar
unknown committed
111
      if (maria_ft_parse(parsed, (uchar *)ftsi.pos, ftsi.len, parser, param,
112
                         mem_root))
113 114 115 116 117
        DBUG_RETURN(1);
  }
  DBUG_RETURN(0);
}

unknown's avatar
unknown committed
118
FT_WORD * _ma_ft_parserecord(MARIA_HA *info, uint keynr, const uchar *record,
119
                             MEM_ROOT *mem_root)
120 121 122 123
{
  TREE ptree;
  MYSQL_FTPARSER_PARAM *param;
  DBUG_ENTER("_ma_ft_parserecord");
124
  if (! (param= maria_ftparser_call_initializer(info, keynr, 0)))
125 126
    DBUG_RETURN(NULL);
  bzero((char*) &ptree, sizeof(ptree));
127 128
  param->flags= 0;
  if (_ma_ft_parse(&ptree, info, keynr, record, param, mem_root))
129 130
    DBUG_RETURN(NULL);

131
  DBUG_RETURN(maria_ft_linearize(&ptree, mem_root));
132 133
}

unknown's avatar
unknown committed
134
static int _ma_ft_store(MARIA_HA *info, uint keynr, uchar *keybuf,
135 136 137 138 139 140 141 142
			FT_WORD *wlist, my_off_t filepos)
{
  uint key_length;
  DBUG_ENTER("_ma_ft_store");

  for (; wlist->pos; wlist++)
  {
    key_length= _ma_ft_make_key(info,keynr,keybuf,wlist,filepos);
unknown's avatar
unknown committed
143
    if (_ma_ck_write(info, keynr, keybuf, key_length))
144 145 146 147 148
      DBUG_RETURN(1);
   }
   DBUG_RETURN(0);
}

unknown's avatar
unknown committed
149
static int _ma_ft_erase(MARIA_HA *info, uint keynr, uchar *keybuf,
150 151 152 153 154 155 156 157
			FT_WORD *wlist, my_off_t filepos)
{
  uint key_length, err=0;
  DBUG_ENTER("_ma_ft_erase");

  for (; wlist->pos; wlist++)
  {
    key_length= _ma_ft_make_key(info,keynr,keybuf,wlist,filepos);
unknown's avatar
unknown committed
158
    if (_ma_ck_delete(info, keynr, keybuf, key_length))
159 160 161 162 163 164 165 166 167 168 169 170 171
      err=1;
   }
   DBUG_RETURN(err);
}

/*
  Compares an appropriate parts of two WORD_KEY keys directly out of records
  returns 1 if they are different
*/

#define THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT 1
#define GEE_THEY_ARE_ABSOLUTELY_IDENTICAL	 0

unknown's avatar
unknown committed
172
int _ma_ft_cmp(MARIA_HA *info, uint keynr, const uchar *rec1, const uchar *rec2)
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
{
  FT_SEG_ITERATOR ftsi1, ftsi2;
  CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset;
  DBUG_ENTER("_ma_ft_cmp");

  _ma_ft_segiterator_init(info, keynr, rec1, &ftsi1);
  _ma_ft_segiterator_init(info, keynr, rec2, &ftsi2);

  while (_ma_ft_segiterator(&ftsi1) && _ma_ft_segiterator(&ftsi2))
  {
    if ((ftsi1.pos != ftsi2.pos) &&
        (!ftsi1.pos || !ftsi2.pos ||
         ha_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len,
                         (uchar*) ftsi2.pos,ftsi2.len,0,0)))
      DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT);
  }
  DBUG_RETURN(GEE_THEY_ARE_ABSOLUTELY_IDENTICAL);
}


/* update a document entry */

unknown's avatar
unknown committed
195 196
int _ma_ft_update(MARIA_HA *info, uint keynr, uchar *keybuf,
                  const uchar *oldrec, const uchar *newrec, my_off_t pos)
197 198 199 200 201 202 203 204
{
  int error= -1;
  FT_WORD *oldlist,*newlist, *old_word, *new_word;
  CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset;
  uint key_length;
  int cmp, cmp2;
  DBUG_ENTER("_ma_ft_update");

205 206 207 208 209
  if (!(old_word=oldlist=_ma_ft_parserecord(info, keynr, oldrec,
                                            &info->ft_memroot)) ||
      !(new_word=newlist=_ma_ft_parserecord(info, keynr, newrec,
                                            &info->ft_memroot)))
    goto err;
210 211 212 213 214 215 216 217 218 219 220

  error=0;
  while(old_word->pos && new_word->pos)
  {
    cmp= ha_compare_text(cs, (uchar*) old_word->pos,old_word->len,
                             (uchar*) new_word->pos,new_word->len,0,0);
    cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5);

    if (cmp < 0 || cmp2)
    {
      key_length= _ma_ft_make_key(info,keynr,keybuf,old_word,pos);
unknown's avatar
unknown committed
221
      if ((error= _ma_ck_delete(info,keynr, keybuf,key_length)))
222
        goto err;
223 224 225
    }
    if (cmp > 0 || cmp2)
    {
unknown's avatar
unknown committed
226 227
      key_length= _ma_ft_make_key(info, keynr, keybuf, new_word,pos);
      if ((error= _ma_ck_write(info, keynr, keybuf,key_length)))
228
        goto err;
229 230 231 232 233 234 235 236 237
    }
    if (cmp<=0) old_word++;
    if (cmp>=0) new_word++;
 }
 if (old_word->pos)
   error= _ma_ft_erase(info,keynr,keybuf,old_word,pos);
 else if (new_word->pos)
   error= _ma_ft_store(info,keynr,keybuf,new_word,pos);

238 239
err:
  free_root(&info->ft_memroot, MYF(MY_MARK_BLOCKS_FREE));
240 241 242 243 244 245
  DBUG_RETURN(error);
}


/* adds a document to the collection */

unknown's avatar
unknown committed
246
int _ma_ft_add(MARIA_HA *info, uint keynr, uchar *keybuf, const uchar *record,
247 248 249 250 251
	       my_off_t pos)
{
  int error= -1;
  FT_WORD *wlist;
  DBUG_ENTER("_ma_ft_add");
252
  DBUG_PRINT("enter",("keynr: %d",keynr));
253

254
  if ((wlist= _ma_ft_parserecord(info, keynr, record, &info->ft_memroot)))
255
    error= _ma_ft_store(info,keynr,keybuf,wlist,pos);
256 257
  free_root(&info->ft_memroot, MYF(MY_MARK_BLOCKS_FREE));
  DBUG_PRINT("exit",("Return: %d",error));
258 259 260 261 262 263
  DBUG_RETURN(error);
}


/* removes a document from the collection */

unknown's avatar
unknown committed
264
int _ma_ft_del(MARIA_HA *info, uint keynr, uchar *keybuf, const uchar *record,
265 266 267 268 269 270 271
	       my_off_t pos)
{
  int error= -1;
  FT_WORD *wlist;
  DBUG_ENTER("_ma_ft_del");
  DBUG_PRINT("enter",("keynr: %d",keynr));

272
  if ((wlist= _ma_ft_parserecord(info, keynr, record, &info->ft_memroot)))
273
    error= _ma_ft_erase(info,keynr,keybuf,wlist,pos);
274
  free_root(&info->ft_memroot, MYF(MY_MARK_BLOCKS_FREE));
275 276 277 278
  DBUG_PRINT("exit",("Return: %d",error));
  DBUG_RETURN(error);
}

unknown's avatar
unknown committed
279

unknown's avatar
unknown committed
280
uint _ma_ft_make_key(MARIA_HA *info, uint keynr, uchar *keybuf, FT_WORD *wptr,
unknown's avatar
unknown committed
281
                     my_off_t filepos)
282
{
unknown's avatar
unknown committed
283
  uchar buf[HA_FT_MAXBYTELEN+16];
284 285 286 287 288 289 290 291 292 293 294 295 296
  DBUG_ENTER("_ma_ft_make_key");

#if HA_FT_WTYPE == HA_KEYTYPE_FLOAT
  {
    float weight=(float) ((filepos==HA_OFFSET_ERROR) ? 0 : wptr->weight);
    mi_float4store(buf,weight);
  }
#else
#error
#endif

  int2store(buf+HA_FT_WLEN,wptr->len);
  memcpy(buf+HA_FT_WLEN+2,wptr->pos,wptr->len);
unknown's avatar
unknown committed
297
  DBUG_RETURN(_ma_make_key(info, keynr, keybuf, buf, filepos));
298 299 300 301 302 303 304
}


/*
  convert key value to ft2
*/

unknown's avatar
unknown committed
305
uint _ma_ft_convert_to_ft2(MARIA_HA *info, uint keynr, uchar *key)
306 307 308 309
{
  my_off_t root;
  DYNAMIC_ARRAY *da=info->ft1_to_ft2;
  MARIA_KEYDEF *keyinfo=&info->s->ft2_keyinfo;
unknown's avatar
unknown committed
310
  uchar *key_ptr= (uchar*) dynamic_array_ptr(da, 0), *end;
311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331
  uint length, key_length;
  DBUG_ENTER("_ma_ft_convert_to_ft2");

  /* we'll generate one pageful at once, and insert the rest one-by-one */
  /* calculating the length of this page ...*/
  length=(keyinfo->block_length-2) / keyinfo->keylength;
  set_if_smaller(length, da->elements);
  length=length * keyinfo->keylength;

  get_key_full_length_rdonly(key_length, key);
  while (_ma_ck_delete(info, keynr, key, key_length) == 0)
  {
    /*
      nothing to do here.
      _ma_ck_delete() will populate info->ft1_to_ft2 with deleted keys
     */
  }

  /* creating pageful of keys */
  maria_putint(info->buff,length+2,0);
  memcpy(info->buff+2, key_ptr, length);
332
  info->keyread_buff_used=info->page_changed=1;           /* info->buff is used */
333 334 335 336 337
  if ((root= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR ||
      _ma_write_keypage(info,keyinfo,root,DFLT_INIT_HITS,info->buff))
    DBUG_RETURN(-1);

  /* inserting the rest of key values */
unknown's avatar
unknown committed
338
  end= (uchar*) dynamic_array_ptr(da, da->elements);
339 340 341 342 343 344 345 346 347 348 349 350 351 352
  for (key_ptr+=length; key_ptr < end; key_ptr+=keyinfo->keylength)
    if(_ma_ck_real_write_btree(info, keyinfo, key_ptr, 0, &root, SEARCH_SAME))
      DBUG_RETURN(-1);

  /* now, writing the word key entry */
  ft_intXstore(key+key_length, - (int) da->elements);
  _ma_dpointer(info, key+key_length+HA_FT_WLEN, root);

  DBUG_RETURN(_ma_ck_real_write_btree(info,
                                     info->s->keyinfo+keynr,
                                     key, 0,
                                     &info->s->state.key_root[keynr],
                                     SEARCH_SAME));
}