Commit aa98f58b authored by unknown's avatar unknown

query expansion for fulltext search


myisam/ft_boolean_search.c:
  assert.h and queues.h moved to ftdefs.h
myisam/ft_parser.c:
  ft_parse() with alloc
myisam/ft_static.c:
  api changes, ft_max_word_len_for_sort variable removed
myisam/ft_update.c:
  ft_parse() with alloc
myisam/ftdefs.h:
  variable ft_max_word_len_for_sort -> define FT_MAX_WORD_LEN_FOR_SORT
  api changes, ft_max_word_len_for_sort variable removed
  ft_parse() with alloc
myisam/mi_check.c:
  variable ft_max_word_len_for_sort -> define FT_MAX_WORD_LEN_FOR_SORT
myisam/myisamchk.c:
  ft_max_word_len_for_sort removed
mysql-test/r/fulltext.result:
  query expansion tests
mysql-test/r/fulltext_var.result:
  ft_max_word_len_for_sort removed
mysql-test/t/fulltext.test:
  query expansion tests
sql/ha_myisam.h:
  ft api changes for query expansion
sql/mysqld.cc:
  ft_max_word_len_for_sort removed
  ft_query_expansion_limit added
sql/set_var.cc:
  ft_max_word_len_for_sort removed
  ft_query_expansion_limit added
sql/sql_yacc.yy:
  EXPANSION added to keyword: rule
parent fd85cc84
...@@ -51,18 +51,18 @@ extern const char *ft_precompiled_stopwords[]; ...@@ -51,18 +51,18 @@ extern const char *ft_precompiled_stopwords[];
extern ulong ft_min_word_len; extern ulong ft_min_word_len;
extern ulong ft_max_word_len; extern ulong ft_max_word_len;
extern ulong ft_max_word_len_for_sort; extern ulong ft_query_expansion_limit;
extern const char *ft_boolean_syntax; extern const char *ft_boolean_syntax;
int ft_init_stopwords(void); int ft_init_stopwords(void);
void ft_free_stopwords(void); void ft_free_stopwords(void);
#define FT_NL 0 /* this MUST be 0, see ft_init_search() */ #define FT_NL 0
#define FT_BOOL 1 /* this MUST be 1, see ft_init_search() */ #define FT_BOOL 1
#define FT_SORTED 2 #define FT_SORTED 2
#define FT_EXPAND 4 /* query expansion */ #define FT_EXPAND 4 /* query expansion */
FT_INFO *ft_init_search(uint,void *, uint, byte *, uint); FT_INFO *ft_init_search(uint,void *, uint, byte *, uint, byte *);
#ifdef __cplusplus #ifdef __cplusplus
} }
......
...@@ -20,8 +20,6 @@ ...@@ -20,8 +20,6 @@
#define FT_CORE #define FT_CORE
#include "ftdefs.h" #include "ftdefs.h"
#include <queues.h>
#include <assert.h> /* for DBUG_ASSERT() */
/* search with boolean queries */ /* search with boolean queries */
...@@ -340,8 +338,7 @@ static void _ftb_init_index_search(FT_INFO *ftb) ...@@ -340,8 +338,7 @@ static void _ftb_init_index_search(FT_INFO *ftb)
FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query,
uint query_len, uint query_len)
uint flags __attribute__((unused)))
{ {
FTB *ftb; FTB *ftb;
FTB_EXPR *ftbe; FTB_EXPR *ftbe;
......
...@@ -167,17 +167,28 @@ static int walk_and_copy(FT_SUPERDOC *from, ...@@ -167,17 +167,28 @@ static int walk_and_copy(FT_SUPERDOC *from,
DBUG_RETURN(0); DBUG_RETURN(0);
} }
static int walk_and_push(FT_SUPERDOC *from,
uint32 count __attribute__((unused)), QUEUE *best)
{
DBUG_ENTER("walk_and_copy");
from->doc.weight+=from->tmp_weight*from->word_ptr->weight;
set_if_smaller(best->elements, ft_query_expansion_limit-1)
queue_insert(best, (byte *)& from->doc);
DBUG_RETURN(0);
}
static int FT_DOC_cmp(FT_DOC *a, FT_DOC *b)
static int FT_DOC_cmp(void *unused __attribute__((unused)),
FT_DOC *a, FT_DOC *b)
{ {
return sgn(b->weight - a->weight); return sgn(b->weight - a->weight);
} }
FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query, FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query,
uint query_len, uint flags) uint query_len, uint flags, byte *record)
{ {
TREE allocated_wtree, *wtree=&allocated_wtree; TREE wtree;
ALL_IN_ONE aio; ALL_IN_ONE aio;
FT_DOC *dptr; FT_DOC *dptr;
FT_INFO *dlist=NULL; FT_INFO *dlist=NULL;
...@@ -196,24 +207,47 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query, ...@@ -196,24 +207,47 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query,
aio.charset=info->s->keyinfo[keynr].seg->charset; aio.charset=info->s->keyinfo[keynr].seg->charset;
aio.keybuff=info->lastkey+info->s->base.max_key_length; aio.keybuff=info->lastkey+info->s->base.max_key_length;
bzero(&allocated_wtree,sizeof(allocated_wtree)); bzero(&wtree,sizeof(wtree));
init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0, init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0,
NULL, NULL); NULL, NULL);
ft_parse_init(&allocated_wtree, aio.charset); ft_parse_init(&wtree, aio.charset);
if (ft_parse(&allocated_wtree,query,query_len)) if (ft_parse(&wtree,query,query_len,0))
goto err; goto err;
if (tree_walk(wtree, (tree_walk_action)&walk_and_match, &aio, if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio,
left_root_right)) left_root_right))
goto err2; goto err;
if (flags & FT_EXPAND && ft_query_expansion_limit)
{
QUEUE best;
init_queue(&best,ft_query_expansion_limit,0,0, &FT_DOC_cmp, 0);
tree_walk(&aio.dtree, (tree_walk_action) &walk_and_push,
&best, left_root_right);
while (best.elements)
{
my_off_t docid=((FT_DOC *)queue_remove(& best, 0))->dpos;
if (!(*info->read_record)(info,docid,record))
{
info->update|= HA_STATE_AKTIV;
_mi_ft_parse(&wtree, info, keynr, record,1);
}
}
delete_queue(&best);
reset_tree(&aio.dtree);
if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio,
left_root_right))
goto err;
}
dlist=(FT_INFO *)my_malloc(sizeof(FT_INFO)+ dlist=(FT_INFO *)my_malloc(sizeof(FT_INFO)+
sizeof(FT_DOC)*(aio.dtree.elements_in_tree-1), sizeof(FT_DOC)*(aio.dtree.elements_in_tree-1),
MYF(0)); MYF(0));
if(!dlist) if (!dlist)
goto err2; goto err;
dlist->please= (struct _ft_vft *) & _ft_vft_nlq; dlist->please= (struct _ft_vft *) & _ft_vft_nlq;
dlist->ndocs=aio.dtree.elements_in_tree; dlist->ndocs=aio.dtree.elements_in_tree;
...@@ -225,13 +259,11 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query, ...@@ -225,13 +259,11 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query,
&dptr, left_root_right); &dptr, left_root_right);
if (flags & FT_SORTED) if (flags & FT_SORTED)
qsort(dlist->doc, dlist->ndocs, sizeof(FT_DOC), (qsort_cmp)&FT_DOC_cmp); qsort2(dlist->doc, dlist->ndocs, sizeof(FT_DOC), (qsort2_cmp)&FT_DOC_cmp, 0);
err2:
delete_tree(wtree);
delete_tree(&aio.dtree);
err: err:
delete_tree(&aio.dtree);
delete_tree(&wtree);
info->lastpos=saved_lastpos; info->lastpos=saved_lastpos;
DBUG_RETURN(dlist); DBUG_RETURN(dlist);
} }
......
...@@ -183,7 +183,7 @@ void ft_parse_init(TREE *wtree, CHARSET_INFO *cs) ...@@ -183,7 +183,7 @@ void ft_parse_init(TREE *wtree, CHARSET_INFO *cs)
DBUG_VOID_RETURN; DBUG_VOID_RETURN;
} }
int ft_parse(TREE *wtree, byte *doc, int doclen) int ft_parse(TREE *wtree, byte *doc, int doclen, my_bool with_alloc)
{ {
byte *end=doc+doclen; byte *end=doc+doclen;
FT_WORD w; FT_WORD w;
...@@ -191,6 +191,15 @@ int ft_parse(TREE *wtree, byte *doc, int doclen) ...@@ -191,6 +191,15 @@ int ft_parse(TREE *wtree, byte *doc, int doclen)
while (ft_simple_get_word(wtree->custom_arg, &doc,end,&w)) while (ft_simple_get_word(wtree->custom_arg, &doc,end,&w))
{ {
if (with_alloc)
{
byte *ptr;
/* allocating the data in the tree - to avoid mallocs and frees */
DBUG_ASSERT(wtree->with_delete==0);
ptr=(byte *)alloc_root(& wtree->mem_root,w.len);
memcpy(ptr, w.pos, w.len);
w.pos=ptr;
}
if (!tree_insert(wtree, &w, 0, wtree->custom_arg)) if (!tree_insert(wtree, &w, 0, wtree->custom_arg))
goto err; goto err;
} }
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
ulong ft_min_word_len=4; ulong ft_min_word_len=4;
ulong ft_max_word_len=HA_FT_MAXLEN; ulong ft_max_word_len=HA_FT_MAXLEN;
ulong ft_max_word_len_for_sort=20; ulong ft_query_expansion_limit=5;
const char *ft_boolean_syntax="+ -><()~*:\"\"&|"; const char *ft_boolean_syntax="+ -><()~*:\"\"&|";
const HA_KEYSEG ft_keysegs[FT_SEGS]={ const HA_KEYSEG ft_keysegs[FT_SEGS]={
...@@ -53,14 +53,13 @@ const struct _ft_vft _ft_vft_boolean = { ...@@ -53,14 +53,13 @@ const struct _ft_vft _ft_vft_boolean = {
ft_boolean_get_relevance, ft_boolean_reinit_search ft_boolean_get_relevance, ft_boolean_reinit_search
}; };
FT_INFO *(*_ft_init_vft[2])(MI_INFO *, uint, byte *, uint, uint) =
{ ft_init_nlq_search, ft_init_boolean_search };
FT_INFO *ft_init_search(uint flags, void *info, uint keynr, FT_INFO *ft_init_search(uint flags, void *info, uint keynr,
byte *query, uint query_len) byte *query, uint query_len, byte *record)
{ {
return (*_ft_init_vft[ flags&1 ])((MI_INFO *)info, keynr, if (flags & FT_BOOL)
query, query_len, flags); ft_init_boolean_search((MI_INFO *)info, keynr, query, query_len);
else
ft_init_nlq_search((MI_INFO *)info, keynr, query, query_len, flags, record);
} }
const char *ft_stopword_file = 0; const char *ft_stopword_file = 0;
......
...@@ -97,7 +97,8 @@ uint _mi_ft_segiterator(register FT_SEG_ITERATOR *ftsi) ...@@ -97,7 +97,8 @@ uint _mi_ft_segiterator(register FT_SEG_ITERATOR *ftsi)
/* parses a document i.e. calls ft_parse for every keyseg */ /* parses a document i.e. calls ft_parse for every keyseg */
uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record) uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr,
const byte *record, my_bool with_alloc)
{ {
FT_SEG_ITERATOR ftsi; FT_SEG_ITERATOR ftsi;
DBUG_ENTER("_mi_ft_parse"); DBUG_ENTER("_mi_ft_parse");
...@@ -108,7 +109,7 @@ uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record) ...@@ -108,7 +109,7 @@ uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record)
while (_mi_ft_segiterator(&ftsi)) while (_mi_ft_segiterator(&ftsi))
{ {
if (ftsi.pos) if (ftsi.pos)
if (ft_parse(parsed, (byte *)ftsi.pos, ftsi.len)) if (ft_parse(parsed, (byte *)ftsi.pos, ftsi.len, with_alloc))
DBUG_RETURN(1); DBUG_RETURN(1);
} }
DBUG_RETURN(0); DBUG_RETURN(0);
...@@ -120,7 +121,7 @@ FT_WORD * _mi_ft_parserecord(MI_INFO *info, uint keynr, const byte *record) ...@@ -120,7 +121,7 @@ FT_WORD * _mi_ft_parserecord(MI_INFO *info, uint keynr, const byte *record)
DBUG_ENTER("_mi_ft_parserecord"); DBUG_ENTER("_mi_ft_parserecord");
bzero((char*) &ptree, sizeof(ptree)); bzero((char*) &ptree, sizeof(ptree));
if (_mi_ft_parse(&ptree, info, keynr, record)) if (_mi_ft_parse(&ptree, info, keynr, record,0))
DBUG_RETURN(NULL); DBUG_RETURN(NULL);
DBUG_RETURN(ft_linearize(&ptree)); DBUG_RETURN(ft_linearize(&ptree));
......
...@@ -21,11 +21,15 @@ ...@@ -21,11 +21,15 @@
#include "fulltext.h" #include "fulltext.h"
#include <m_ctype.h> #include <m_ctype.h>
#include <my_tree.h> #include <my_tree.h>
#include <queues.h>
#include <assert.h>
#define true_word_char(s,X) (my_isalnum(s,X) || (X)=='_') #define true_word_char(s,X) (my_isalnum(s,X) || (X)=='_')
#define misc_word_char(X) ((X)=='\'') #define misc_word_char(X) ((X)=='\'')
#define word_char(s,X) (true_word_char(s,X) || misc_word_char(X)) #define word_char(s,X) (true_word_char(s,X) || misc_word_char(X))
#define FT_MAX_WORD_LEN_FOR_SORT 20
#define COMPILE_STOPWORDS_IN #define COMPILE_STOPWORDS_IN
/* Interested readers may consult SMART /* Interested readers may consult SMART
...@@ -122,13 +126,15 @@ void _mi_ft_segiterator_dummy_init(const byte *, uint, FT_SEG_ITERATOR *); ...@@ -122,13 +126,15 @@ void _mi_ft_segiterator_dummy_init(const byte *, uint, FT_SEG_ITERATOR *);
uint _mi_ft_segiterator(FT_SEG_ITERATOR *); uint _mi_ft_segiterator(FT_SEG_ITERATOR *);
void ft_parse_init(TREE *, CHARSET_INFO *); void ft_parse_init(TREE *, CHARSET_INFO *);
int ft_parse(TREE *, byte *, int); int ft_parse(TREE *, byte *, int, my_bool);
FT_WORD * ft_linearize(TREE *); FT_WORD * ft_linearize(TREE *);
FT_WORD * _mi_ft_parserecord(MI_INFO *, uint, const byte *); FT_WORD * _mi_ft_parserecord(MI_INFO *, uint, const byte *);
uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr, const byte *record); uint _mi_ft_parse(TREE *, MI_INFO *, uint, const byte *, my_bool);
FT_INFO *ft_init_nlq_search(MI_INFO *, uint, byte *, uint, uint, byte *);
FT_INFO *ft_init_boolean_search(MI_INFO *, uint, byte *, uint);
extern const struct _ft_vft _ft_vft_nlq; extern const struct _ft_vft _ft_vft_nlq;
FT_INFO *ft_init_nlq_search(MI_INFO *, uint, byte *, uint, uint);
int ft_nlq_read_next(FT_INFO *, char *); int ft_nlq_read_next(FT_INFO *, char *);
float ft_nlq_find_relevance(FT_INFO *, byte *, uint); float ft_nlq_find_relevance(FT_INFO *, byte *, uint);
void ft_nlq_close_search(FT_INFO *); void ft_nlq_close_search(FT_INFO *);
...@@ -137,10 +143,10 @@ my_off_t ft_nlq_get_docid(FT_INFO *); ...@@ -137,10 +143,10 @@ my_off_t ft_nlq_get_docid(FT_INFO *);
void ft_nlq_reinit_search(FT_INFO *); void ft_nlq_reinit_search(FT_INFO *);
extern const struct _ft_vft _ft_vft_boolean; extern const struct _ft_vft _ft_vft_boolean;
FT_INFO *ft_init_boolean_search(MI_INFO *, uint, byte *, uint, uint);
int ft_boolean_read_next(FT_INFO *, char *); int ft_boolean_read_next(FT_INFO *, char *);
float ft_boolean_find_relevance(FT_INFO *, byte *, uint); float ft_boolean_find_relevance(FT_INFO *, byte *, uint);
void ft_boolean_close_search(FT_INFO *); void ft_boolean_close_search(FT_INFO *);
float ft_boolean_get_relevance(FT_INFO *); float ft_boolean_get_relevance(FT_INFO *);
my_off_t ft_boolean_get_docid(FT_INFO *); my_off_t ft_boolean_get_docid(FT_INFO *);
void ft_boolean_reinit_search(FT_INFO *); void ft_boolean_reinit_search(FT_INFO *);
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
#include <m_ctype.h> #include <m_ctype.h>
#include <stdarg.h> #include <stdarg.h>
#include <my_getopt.h> #include <my_getopt.h>
#include <assert.h>
#ifdef HAVE_SYS_VADVISE_H #ifdef HAVE_SYS_VADVISE_H
#include <sys/vadvise.h> #include <sys/vadvise.h>
#endif #endif
...@@ -1955,11 +1954,11 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, ...@@ -1955,11 +1954,11 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
if (sort_param.keyinfo->flag & HA_FULLTEXT) if (sort_param.keyinfo->flag & HA_FULLTEXT)
{ {
sort_info.max_records= sort_info.max_records=
(ha_rows) (sort_info.filelength/ft_max_word_len_for_sort+1); (ha_rows) (sort_info.filelength/FT_MAX_WORD_LEN_FOR_SORT+1);
sort_param.key_read=sort_ft_key_read; sort_param.key_read=sort_ft_key_read;
sort_param.key_write=sort_ft_key_write; sort_param.key_write=sort_ft_key_write;
sort_param.key_length+=ft_max_word_len_for_sort-HA_FT_MAXLEN; sort_param.key_length+=FT_MAX_WORD_LEN_FOR_SORT-HA_FT_MAXLEN;
} }
else else
{ {
...@@ -2350,7 +2349,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, ...@@ -2350,7 +2349,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info,
total_key_length+=sort_param[i].key_length; total_key_length+=sort_param[i].key_length;
if (sort_param[i].keyinfo->flag & HA_FULLTEXT) if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
sort_param[i].key_length+=ft_max_word_len_for_sort-ft_max_word_len; sort_param[i].key_length+=FT_MAX_WORD_LEN_FOR_SORT-ft_max_word_len;
} }
sort_info.total_keys=i; sort_info.total_keys=i;
sort_param[0].master= 1; sort_param[0].master= 1;
...@@ -3875,7 +3874,7 @@ static my_bool mi_too_big_key_for_sort(MI_KEYDEF *key, ha_rows rows) ...@@ -3875,7 +3874,7 @@ static my_bool mi_too_big_key_for_sort(MI_KEYDEF *key, ha_rows rows)
{ {
uint key_maxlength=key->maxlength; uint key_maxlength=key->maxlength;
if (key->flag & HA_FULLTEXT) if (key->flag & HA_FULLTEXT)
key_maxlength+=ft_max_word_len_for_sort-HA_FT_MAXLEN; key_maxlength+=FT_MAX_WORD_LEN_FOR_SORT-HA_FT_MAXLEN;
return (key->flag & (HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY | HA_FULLTEXT) && return (key->flag & (HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY | HA_FULLTEXT) &&
((ulonglong) rows * key_maxlength > ((ulonglong) rows * key_maxlength >
(ulonglong) myisam_max_temp_length)); (ulonglong) myisam_max_temp_length));
...@@ -3891,7 +3890,7 @@ void mi_disable_non_unique_index(MI_INFO *info, ha_rows rows) ...@@ -3891,7 +3890,7 @@ void mi_disable_non_unique_index(MI_INFO *info, ha_rows rows)
MI_KEYDEF *key=share->keyinfo; MI_KEYDEF *key=share->keyinfo;
for (i=0 ; i < share->base.keys ; i++,key++) for (i=0 ; i < share->base.keys ; i++,key++)
{ {
if (!(key->flag & (HA_NOSAME | HA_SPATIAL | HA_AUTO_KEY)) && if (!(key->flag & (HA_NOSAME | HA_SPATIAL | HA_AUTO_KEY)) &&
! mi_too_big_key_for_sort(key,rows) && info->s->base.auto_key != i+1) ! mi_too_big_key_for_sort(key,rows) && info->s->base.auto_key != i+1)
{ {
share->state.key_map&= ~ ((ulonglong) 1 << i); share->state.key_map&= ~ ((ulonglong) 1 << i);
......
...@@ -314,9 +314,6 @@ static struct my_option my_long_options[] = ...@@ -314,9 +314,6 @@ static struct my_option my_long_options[] =
{ "ft_max_word_len", OPT_FT_MAX_WORD_LEN, "", (gptr*) &ft_max_word_len, { "ft_max_word_len", OPT_FT_MAX_WORD_LEN, "", (gptr*) &ft_max_word_len,
(gptr*) &ft_max_word_len, 0, GET_ULONG, REQUIRED_ARG, HA_FT_MAXLEN, 10, (gptr*) &ft_max_word_len, 0, GET_ULONG, REQUIRED_ARG, HA_FT_MAXLEN, 10,
HA_FT_MAXLEN, 0, 1, 0}, HA_FT_MAXLEN, 0, 1, 0},
{ "ft_max_word_len_for_sort", OPT_FT_MAX_WORD_LEN_FOR_SORT, "",
(gptr*) &ft_max_word_len_for_sort, (gptr*) &ft_max_word_len_for_sort, 0,
GET_ULONG, REQUIRED_ARG, 20, 4, HA_FT_MAXLEN, 0, 1, 0},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
}; };
......
...@@ -18,11 +18,20 @@ Full-text indexes are called collections ...@@ -18,11 +18,20 @@ Full-text indexes are called collections
Only MyISAM tables support collections Only MyISAM tables support collections
select * from t1 where MATCH(a,b) AGAINST ("only"); select * from t1 where MATCH(a,b) AGAINST ("only");
a b a b
select * from t1 where MATCH(a,b) AGAINST ("collections") UNION ALL select * from t1 where MATCH(a,b) AGAINST ("indexes"); select * from t1 where MATCH(a,b) AGAINST ("collections" WITH QUERY EXPANSION);
a b a b
Only MyISAM tables support collections Only MyISAM tables support collections
Full-text indexes are called collections Full-text indexes are called collections
MySQL has now support for full-text search
select * from t1 where MATCH(a,b) AGAINST ("indexes" WITH QUERY EXPANSION);
a b
Full-text indexes are called collections
Only MyISAM tables support collections
select * from t1 where MATCH(a,b) AGAINST ("indexes collections" WITH QUERY EXPANSION);
a b
Full-text indexes are called collections Full-text indexes are called collections
Only MyISAM tables support collections
MySQL has now support for full-text search
explain select * from t1 where MATCH(a,b) AGAINST ("collections"); explain select * from t1 where MATCH(a,b) AGAINST ("collections");
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 fulltext a a 0 1 Using where 1 SIMPLE t1 fulltext a a 0 1 Using where
......
...@@ -3,5 +3,5 @@ Variable_name Value ...@@ -3,5 +3,5 @@ Variable_name Value
ft_boolean_syntax + -><()~*:""&| ft_boolean_syntax + -><()~*:""&|
ft_min_word_len 4 ft_min_word_len 4
ft_max_word_len 254 ft_max_word_len 254
ft_max_word_len_for_sort 20 ft_query_expansion_limit 20
ft_stopword_file (built-in) ft_stopword_file (built-in)
...@@ -20,9 +20,11 @@ select * from t1 where MATCH(a,b) AGAINST ("indexes"); ...@@ -20,9 +20,11 @@ select * from t1 where MATCH(a,b) AGAINST ("indexes");
select * from t1 where MATCH(a,b) AGAINST ("indexes collections"); select * from t1 where MATCH(a,b) AGAINST ("indexes collections");
select * from t1 where MATCH(a,b) AGAINST ("only"); select * from t1 where MATCH(a,b) AGAINST ("only");
# UNION of fulltext's # query expansion
select * from t1 where MATCH(a,b) AGAINST ("collections") UNION ALL select * from t1 where MATCH(a,b) AGAINST ("indexes");
select * from t1 where MATCH(a,b) AGAINST ("collections" WITH QUERY EXPANSION);
select * from t1 where MATCH(a,b) AGAINST ("indexes" WITH QUERY EXPANSION);
select * from t1 where MATCH(a,b) AGAINST ("indexes collections" WITH QUERY EXPANSION);
# add_ft_keys() tests # add_ft_keys() tests
...@@ -66,7 +68,6 @@ select * from t1 where MATCH a AGAINST ("search" IN BOOLEAN MODE); ...@@ -66,7 +68,6 @@ select * from t1 where MATCH a AGAINST ("search" IN BOOLEAN MODE);
select * from t1 where MATCH b AGAINST ("sear*" IN BOOLEAN MODE); select * from t1 where MATCH b AGAINST ("sear*" IN BOOLEAN MODE);
# UNION of fulltext's # UNION of fulltext's
select * from t1 where MATCH(a,b) AGAINST ("collections") UNION ALL select * from t1 where MATCH(a,b) AGAINST ("indexes"); select * from t1 where MATCH(a,b) AGAINST ("collections") UNION ALL select * from t1 where MATCH(a,b) AGAINST ("indexes");
#update/delete with fulltext index #update/delete with fulltext index
......
...@@ -90,7 +90,7 @@ class ha_myisam: public handler ...@@ -90,7 +90,7 @@ class ha_myisam: public handler
return 0; return 0;
} }
FT_INFO *ft_init_ext(uint flags, uint inx,const byte *key, uint keylen) FT_INFO *ft_init_ext(uint flags, uint inx,const byte *key, uint keylen)
{ return ft_init_search(flags,file,inx,(byte*) key,keylen); } { return ft_init_search(flags,file,inx,(byte*) key,keylen, table->record[0]); }
int ft_read(byte *buf); int ft_read(byte *buf);
int rnd_init(bool scan=1); int rnd_init(bool scan=1);
int rnd_next(byte *buf); int rnd_next(byte *buf);
......
...@@ -3568,7 +3568,7 @@ enum options_mysqld ...@@ -3568,7 +3568,7 @@ enum options_mysqld
OPT_CONNECT_TIMEOUT, OPT_DELAYED_INSERT_TIMEOUT, OPT_CONNECT_TIMEOUT, OPT_DELAYED_INSERT_TIMEOUT,
OPT_DELAYED_INSERT_LIMIT, OPT_DELAYED_QUEUE_SIZE, OPT_DELAYED_INSERT_LIMIT, OPT_DELAYED_QUEUE_SIZE,
OPT_FLUSH_TIME, OPT_FT_MIN_WORD_LEN, OPT_FLUSH_TIME, OPT_FT_MIN_WORD_LEN,
OPT_FT_MAX_WORD_LEN, OPT_FT_MAX_WORD_LEN_FOR_SORT, OPT_FT_STOPWORD_FILE, OPT_FT_MAX_WORD_LEN, OPT_FT_QUERY_EXPANSION_LIMIT, OPT_FT_STOPWORD_FILE,
OPT_INTERACTIVE_TIMEOUT, OPT_JOIN_BUFF_SIZE, OPT_INTERACTIVE_TIMEOUT, OPT_JOIN_BUFF_SIZE,
OPT_KEY_BUFFER_SIZE, OPT_LONG_QUERY_TIME, OPT_KEY_BUFFER_SIZE, OPT_LONG_QUERY_TIME,
OPT_LOWER_CASE_TABLE_NAMES, OPT_MAX_ALLOWED_PACKET, OPT_LOWER_CASE_TABLE_NAMES, OPT_MAX_ALLOWED_PACKET,
...@@ -4210,10 +4210,10 @@ replicating a LOAD DATA INFILE command.", ...@@ -4210,10 +4210,10 @@ replicating a LOAD DATA INFILE command.",
"The maximum length of the word to be included in a FULLTEXT index. Note: FULLTEXT indexes must be rebuilt after changing this variable.", "The maximum length of the word to be included in a FULLTEXT index. Note: FULLTEXT indexes must be rebuilt after changing this variable.",
(gptr*) &ft_max_word_len, (gptr*) &ft_max_word_len, 0, GET_ULONG, (gptr*) &ft_max_word_len, (gptr*) &ft_max_word_len, 0, GET_ULONG,
REQUIRED_ARG, HA_FT_MAXLEN, 10, HA_FT_MAXLEN, 0, 1, 0}, REQUIRED_ARG, HA_FT_MAXLEN, 10, HA_FT_MAXLEN, 0, 1, 0},
{ "ft_max_word_len_for_sort", OPT_FT_MAX_WORD_LEN_FOR_SORT, { "ft_query_expansion_limit", OPT_FT_QUERY_EXPANSION_LIMIT,
"The maximum length of the word for repair_by_sorting. Longer words are included the slow way. The lower this value, the more words will be put in one sort bucket.", "Number of best matches to use for query expansion",
(gptr*) &ft_max_word_len_for_sort, (gptr*) &ft_max_word_len_for_sort, 0, GET_ULONG, (gptr*) &ft_query_expansion_limit, (gptr*) &ft_query_expansion_limit, 0, GET_ULONG,
REQUIRED_ARG, 20, 4, HA_FT_MAXLEN, 0, 1, 0}, REQUIRED_ARG, 20, 0, 1000, 0, 1, 0},
{ "ft_stopword_file", OPT_FT_STOPWORD_FILE, { "ft_stopword_file", OPT_FT_STOPWORD_FILE,
"Use stopwords from this file instead of built-in list.", "Use stopwords from this file instead of built-in list.",
(gptr*) &ft_stopword_file, (gptr*) &ft_stopword_file, 0, GET_STR, (gptr*) &ft_stopword_file, (gptr*) &ft_stopword_file, 0, GET_STR,
......
...@@ -556,15 +556,15 @@ struct show_var_st init_vars[]= { ...@@ -556,15 +556,15 @@ struct show_var_st init_vars[]= {
{"ft_boolean_syntax", (char*) ft_boolean_syntax, SHOW_CHAR}, {"ft_boolean_syntax", (char*) ft_boolean_syntax, SHOW_CHAR},
{"ft_min_word_len", (char*) &ft_min_word_len, SHOW_LONG}, {"ft_min_word_len", (char*) &ft_min_word_len, SHOW_LONG},
{"ft_max_word_len", (char*) &ft_max_word_len, SHOW_LONG}, {"ft_max_word_len", (char*) &ft_max_word_len, SHOW_LONG},
{"ft_max_word_len_for_sort",(char*) &ft_max_word_len_for_sort, SHOW_LONG}, {"ft_query_expansion_limit",(char*) &ft_query_expansion_limit, SHOW_LONG},
{"ft_stopword_file", (char*) &ft_stopword_file, SHOW_CHAR_PTR}, {"ft_stopword_file", (char*) &ft_stopword_file, SHOW_CHAR_PTR},
{"have_bdb", (char*) &have_berkeley_db, SHOW_HAVE}, {"have_bdb", (char*) &have_berkeley_db, SHOW_HAVE},
{"have_crypt", (char*) &have_crypt, SHOW_HAVE}, {"have_crypt", (char*) &have_crypt, SHOW_HAVE},
{"have_compress", (char*) &have_compress, SHOW_HAVE}, {"have_compress", (char*) &have_compress, SHOW_HAVE},
{"have_innodb", (char*) &have_innodb, SHOW_HAVE}, {"have_innodb", (char*) &have_innodb, SHOW_HAVE},
{"have_isam", (char*) &have_isam, SHOW_HAVE}, {"have_isam", (char*) &have_isam, SHOW_HAVE},
{"have_raid", (char*) &have_raid, SHOW_HAVE}, {"have_raid", (char*) &have_raid, SHOW_HAVE},
{"have_symlink", (char*) &have_symlink, SHOW_HAVE}, {"have_symlink", (char*) &have_symlink, SHOW_HAVE},
{"have_openssl", (char*) &have_openssl, SHOW_HAVE}, {"have_openssl", (char*) &have_openssl, SHOW_HAVE},
{"have_query_cache", (char*) &have_query_cache, SHOW_HAVE}, {"have_query_cache", (char*) &have_query_cache, SHOW_HAVE},
{"init_file", (char*) &opt_init_file, SHOW_CHAR_PTR}, {"init_file", (char*) &opt_init_file, SHOW_CHAR_PTR},
......
...@@ -4546,6 +4546,7 @@ keyword: ...@@ -4546,6 +4546,7 @@ keyword:
| ESCAPE_SYM {} | ESCAPE_SYM {}
| EVENTS_SYM {} | EVENTS_SYM {}
| EXECUTE_SYM {} | EXECUTE_SYM {}
| EXPANSION_SYM {}
| EXTENDED_SYM {} | EXTENDED_SYM {}
| FAST_SYM {} | FAST_SYM {}
| DISABLE_SYM {} | DISABLE_SYM {}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment