ft_update.c 7.57 KB
Newer Older
unknown's avatar
unknown committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

/* Written by Sergei A. Golubchik, who has a shared copyright to this code */

/* functions to work with full-text indices */

#include "ftdefs.h"
unknown's avatar
unknown committed
22
#include <math.h>
unknown's avatar
unknown committed
23 24 25 26 27 28 29 30

/**************************************************************
   This is to make ft-code to ignore keyseg.length at all     *
   and to index the whole VARCHAR/BLOB instead...             */
#undef set_if_smaller
#define set_if_smaller(A,B)                          /* no op */
/**************************************************************/

unknown's avatar
unknown committed
31 32 33 34 35 36 37
void _mi_ft_segiterator_init(MI_INFO *info, uint keynr, const byte *record,
    FT_SEG_ITERATOR *ftsi)
{
  ftsi->num=info->s->keyinfo[keynr].keysegs-FT_SEGS;
  ftsi->seg=info->s->keyinfo[keynr].seg;
  ftsi->rec=record;
}
unknown's avatar
unknown committed
38

unknown's avatar
unknown committed
39 40
void _mi_ft_segiterator_dummy_init(const byte *record, uint len,
    FT_SEG_ITERATOR *ftsi)
unknown's avatar
unknown committed
41
{
unknown's avatar
unknown committed
42 43 44 45 46 47 48 49 50 51
  ftsi->num=1;
  ftsi->seg=0;
  ftsi->pos=record;
  ftsi->len=len;
}

/* This function breaks convention "return 0 in success"
   but it's easier to use like this

      while(_mi_ft_segiterator())
unknown's avatar
unknown committed
52

unknown's avatar
unknown committed
53 54 55 56 57
   so "1" means "OK", "0" means "EOF"
*/

uint _mi_ft_segiterator(register FT_SEG_ITERATOR *ftsi)
{
58 59
  if (!ftsi->num) return 0; else ftsi->num--;
  if (!ftsi->seg) return 1; else ftsi->seg--;
unknown's avatar
unknown committed
60 61 62

  if (ftsi->seg->null_bit &&
      (ftsi->rec[ftsi->seg->null_pos] & ftsi->seg->null_bit))
unknown's avatar
unknown committed
63
  {
unknown's avatar
unknown committed
64
      ftsi->pos=0;
65
      return 1;
unknown's avatar
unknown committed
66
  }
unknown's avatar
unknown committed
67 68 69 70 71 72 73 74 75 76 77
  ftsi->pos= ftsi->rec+ftsi->seg->start;
  if (ftsi->seg->flag & HA_VAR_LENGTH)
  {
    ftsi->len=uint2korr(ftsi->pos);
    ftsi->pos+=2;					 /* Skip VARCHAR length */
    set_if_smaller(ftsi->len,ftsi->seg->length);
    return 1;
  }
  if (ftsi->seg->flag & HA_BLOB_PART)
  {
    ftsi->len=_mi_calc_blob_length(ftsi->seg->bit_start,ftsi->pos);
unknown's avatar
unknown committed
78 79
    memcpy_fixed((char*) &ftsi->pos, ftsi->pos+ftsi->seg->bit_start,
		 sizeof(char*));
unknown's avatar
unknown committed
80 81 82 83 84 85 86 87 88 89 90 91 92
    set_if_smaller(ftsi->len,ftsi->seg->length);
    return 1;
  }
  ftsi->len=ftsi->seg->length;
  return 1;
}

/* parses a document i.e. calls ft_parse for every keyseg */
uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record)
{
  FT_SEG_ITERATOR ftsi;
  _mi_ft_segiterator_init(info, keynr, record, &ftsi);

93
  ft_parse_init(parsed, info->s->keyinfo[keynr].seg->charset);
unknown's avatar
unknown committed
94 95 96 97 98
  while (_mi_ft_segiterator(&ftsi))
    if (ftsi.pos)
      if (ft_parse(parsed, (byte *)ftsi.pos, ftsi.len))
        return 1;

99
  return 0;
100 101
}

unknown's avatar
unknown committed
102 103 104
FT_WORD * _mi_ft_parserecord(MI_INFO *info, uint keynr,
			     byte *keybuf __attribute__((unused)),
			     const byte *record)
105 106 107
{
  TREE ptree;

unknown's avatar
unknown committed
108 109
  bzero((char*) &ptree, sizeof(ptree));
  if (_mi_ft_parse(&ptree, info, keynr, record))
110
    return NULL;
111

unknown's avatar
unknown committed
112
  return ft_linearize(/*info, keynr, keybuf, */ &ptree);
unknown's avatar
unknown committed
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
}

static int _mi_ft_store(MI_INFO *info, uint keynr, byte *keybuf,
			FT_WORD *wlist, my_off_t filepos)
{
  uint key_length;

  while(wlist->pos)
  {
    key_length=_ft_make_key(info,keynr,keybuf,wlist,filepos);
    if (_mi_ck_write(info,keynr,(uchar*) keybuf,key_length))
      return 1;
    wlist++;
   }
   return 0;
}

static int _mi_ft_erase(MI_INFO *info, uint keynr, byte *keybuf, FT_WORD *wlist, my_off_t filepos)
{
  uint key_length, err=0;

  while(wlist->pos)
  {
    key_length=_ft_make_key(info,keynr,keybuf,wlist,filepos);
    if (_mi_ck_delete(info,keynr,(uchar*) keybuf,key_length))
      err=1;
    wlist++;
   }
   return err;
}

/* compares an appropriate parts of two WORD_KEY keys directly out of records */
/* returns 1 if they are different */

#define THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT 1
#define GEE_THEY_ARE_ABSOLUTELY_IDENTICAL	 0

int _mi_ft_cmp(MI_INFO *info, uint keynr, const byte *rec1, const byte *rec2)
{
unknown's avatar
unknown committed
152
  FT_SEG_ITERATOR ftsi1, ftsi2;
153
  CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset;
unknown's avatar
unknown committed
154 155
  _mi_ft_segiterator_init(info, keynr, rec1, &ftsi1);
  _mi_ft_segiterator_init(info, keynr, rec2, &ftsi2);
unknown's avatar
unknown committed
156

unknown's avatar
unknown committed
157
  while(_mi_ft_segiterator(&ftsi1) && _mi_ft_segiterator(&ftsi2))
unknown's avatar
unknown committed
158
  {
unknown's avatar
unknown committed
159
    if ((ftsi1.pos != ftsi2.pos) &&
160
        (!ftsi1.pos || !ftsi2.pos ||
161 162
          _mi_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len,
                               (uchar*) ftsi2.pos,ftsi2.len,0)))
unknown's avatar
unknown committed
163 164 165 166 167
      return THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT;
  }
  return GEE_THEY_ARE_ABSOLUTELY_IDENTICAL;
}

168
/* update a document entry */
169
int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf,
170 171 172 173
                  const byte *oldrec, const byte *newrec, my_off_t pos)
{
  int error= -1;
  FT_WORD *oldlist,*newlist, *old_word, *new_word;
174
  CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset;
175
  uint key_length;
unknown's avatar
unknown committed
176
  int cmp, cmp2;
177 178 179 180 181 182

  if (!(old_word=oldlist=_mi_ft_parserecord(info, keynr, keybuf, oldrec)))
    goto err0;
  if (!(new_word=newlist=_mi_ft_parserecord(info, keynr, keybuf, newrec)))
    goto err1;

unknown's avatar
unknown committed
183
  error=0;
184 185
  while(old_word->pos && new_word->pos)
  {
186 187
    cmp=_mi_compare_text(cs, (uchar*) old_word->pos,old_word->len,
                             (uchar*) new_word->pos,new_word->len,0);
unknown's avatar
unknown committed
188
    cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5);
189

unknown's avatar
unknown committed
190 191
    if (cmp < 0 || cmp2)
    {
192
      key_length=_ft_make_key(info,keynr,keybuf,old_word,pos);
193
      if ((error=_mi_ck_delete(info,keynr,(uchar*) keybuf,key_length)))
194
        goto err2;
unknown's avatar
unknown committed
195 196 197
    }
    if (cmp > 0 || cmp2)
    {
198
      key_length=_ft_make_key(info,keynr,keybuf,new_word,pos);
199
      if ((error=_mi_ck_write(info,keynr,(uchar*) keybuf,key_length)))
200
        goto err2;
unknown's avatar
unknown committed
201 202 203
    }
    if (cmp<=0) old_word++;
    if (cmp>=0) new_word++;
204 205 206 207 208 209 210 211 212 213 214 215 216 217
 }
 if (old_word->pos)
   error=_mi_ft_erase(info,keynr,keybuf,old_word,pos);
 else if (new_word->pos)
   error=_mi_ft_store(info,keynr,keybuf,new_word,pos);

err2:
    my_free((char*) newlist,MYF(0));
err1:
    my_free((char*) oldlist,MYF(0));
err0:
  return error;
}

unknown's avatar
unknown committed
218
/* adds a document to the collection */
unknown's avatar
unknown committed
219 220
int _mi_ft_add(MI_INFO *info, uint keynr, byte *keybuf, const byte *record,
	       my_off_t pos)
unknown's avatar
unknown committed
221
{
unknown's avatar
unknown committed
222
  int error= -1;
unknown's avatar
unknown committed
223 224
  FT_WORD *wlist;

unknown's avatar
unknown committed
225 226 227 228 229 230
  if ((wlist=_mi_ft_parserecord(info, keynr, keybuf, record)))
  {
    error=_mi_ft_store(info,keynr,keybuf,wlist,pos);
    my_free((char*) wlist,MYF(0));
  }
  return error;
unknown's avatar
unknown committed
231 232 233
}

/* removes a document from the collection */
unknown's avatar
unknown committed
234 235
int _mi_ft_del(MI_INFO *info, uint keynr, byte *keybuf, const byte *record,
	       my_off_t pos)
unknown's avatar
unknown committed
236
{
unknown's avatar
unknown committed
237
  int error= -1;
unknown's avatar
unknown committed
238
  FT_WORD *wlist;
unknown's avatar
unknown committed
239 240 241 242 243 244
  if ((wlist=_mi_ft_parserecord(info, keynr, keybuf, record)))
  {
    error=_mi_ft_erase(info,keynr,keybuf,wlist,pos);
    my_free((char*) wlist,MYF(0));
  }
  return error;
unknown's avatar
unknown committed
245 246
}

unknown's avatar
unknown committed
247 248
uint _ft_make_key(MI_INFO *info, uint keynr, byte *keybuf, FT_WORD *wptr,
		  my_off_t filepos)
unknown's avatar
unknown committed
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
{
  byte buf[HA_FT_MAXLEN+16];

#if HA_FT_WTYPE == HA_KEYTYPE_FLOAT
  float weight=(float) ((filepos==HA_OFFSET_ERROR) ? 0 : wptr->weight);
  mi_float4store(buf,weight);
#else
#error
#endif

#ifdef EVAL_RUN
  *(buf+HA_FT_WLEN)=wptr->cnt;
  int2store(buf+HA_FT_WLEN+1,wptr->len);
  memcpy(buf+HA_FT_WLEN+3,wptr->pos,wptr->len);
#else /* EVAL_RUN */
  int2store(buf+HA_FT_WLEN,wptr->len);
  memcpy(buf+HA_FT_WLEN+2,wptr->pos,wptr->len);
#endif /* EVAL_RUN */
  return _mi_make_key(info,keynr,(uchar*) keybuf,buf,filepos);
}