Commit fc09f13c authored by unknown's avatar unknown

--ft_stopword_file command-line option

parent 30a35bcf
......@@ -46,6 +46,7 @@ struct st_ft_info
};
#endif
extern const char *ft_stopword_file;
extern const char *ft_precompiled_stopwords[];
extern ulong ft_min_word_len;
......@@ -53,7 +54,7 @@ extern ulong ft_max_word_len;
extern ulong ft_max_word_len_for_sort;
extern const char *ft_boolean_syntax;
int ft_init_stopwords(const char **);
int ft_init_stopwords(void);
void ft_free_stopwords(void);
#define FT_NL 0
......
......@@ -67,6 +67,7 @@ FT_INFO *ft_init_search(uint mode, void *info, uint keynr,
query, query_len, presort);
}
const char *ft_stopword_file = 0;
const char *ft_precompiled_stopwords[] = {
#ifdef COMPILE_STOPWORDS_IN
......
......@@ -33,27 +33,74 @@ static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)),
(uchar *)w2->pos,w2->len,0);
}
int ft_init_stopwords(const char **sws)
static void FT_STOPWORD_free(FT_STOPWORD *w, TREE_FREE action,
void *arg __attribute__((unused)))
{
FT_STOPWORD sw;
if (action == free_free && ft_stopword_file)
my_free(w->pos, MYF(0));
}
static int ft_add_stopword(const char *w)
{
FT_STOPWORD sw;
return !w ||
(((sw.len= (uint) strlen(sw.pos=w)) >= ft_min_word_len) &&
(tree_insert(stopwords3, &sw, 0)==NULL));
}
if(!stopwords3)
int ft_init_stopwords()
{
if (!stopwords3)
{
if(!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0)))) return -1;
init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp,0,
NULL, NULL);
if (!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0))))
return -1;
init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp,
0, (tree_element_free)&FT_STOPWORD_free, NULL);
}
if(!sws) return 0;
for(;*sws;sws++)
if (ft_stopword_file)
{
if( (sw.len= (uint) strlen(sw.pos=*sws)) < ft_min_word_len) continue;
if(!tree_insert(stopwords3, &sw, 0))
File fd;
my_off_t len;
byte *buffer, *start, *end;
FT_WORD w;
int err=-1;
if (!*ft_stopword_file)
return 0;
if ((fd=my_open(ft_stopword_file, O_RDONLY, MYF(MY_WME))) == -1)
return -1;
len=my_seek(fd, 0L, MY_SEEK_END, MYF(0));
my_seek(fd, 0L, MY_SEEK_SET, MYF(0));
if (!(start=buffer=my_malloc(len+1, MYF(MY_WME))))
{
my_close(fd, MYF(MY_WME));
return -1;
}
len=my_read(fd, buffer, len, MYF(MY_WME));
end=start+len;
while (ft_simple_get_word(&start, end, &w))
{
if (ft_add_stopword(my_strdup_with_length(w.pos, w.len, MYF(0))))
goto err1;
}
err=0;
err1:
my_free(buffer, MYF(0));
err0:
my_close(fd, MYF(MY_WME));
return err;
}
else
{
/* compatibility mode: to be removed */
char **sws=ft_precompiled_stopwords;
for (;*sws;sws++)
{
delete_tree(stopwords3); /* purecov: inspected */
return -1; /* purecov: inspected */
if (ft_add_stopword(*sws))
return -1;
}
}
return 0;
......@@ -72,7 +119,7 @@ void ft_free_stopwords()
{
if (stopwords3)
{
delete_tree(stopwords3); /* purecov: inspected */
delete_tree(stopwords3); /* purecov: inspected */
my_free((char*) stopwords3,MYF(0));
stopwords3=0;
}
......
......@@ -860,7 +860,7 @@ static int myisamchk(MI_CHECK *param, my_string filename)
else
{
if (share->fulltext_index)
ft_init_stopwords(ft_precompiled_stopwords); /* SerG */
ft_init_stopwords();
if (!(param->testflag & T_READONLY))
lock_type = F_WRLCK; /* table is changed */
......
......@@ -2072,8 +2072,8 @@ int main(int argc, char **argv)
#endif
if (opt_myisam_log)
(void) mi_log( 1 );
ft_init_stopwords(ft_precompiled_stopwords);
(void) mi_log(1);
ft_init_stopwords();
#ifdef __WIN__
if (!opt_console)
......@@ -2929,7 +2929,7 @@ enum options {
OPT_CONNECT_TIMEOUT, OPT_DELAYED_INSERT_TIMEOUT,
OPT_DELAYED_INSERT_LIMIT, OPT_DELAYED_QUEUE_SIZE,
OPT_FLUSH_TIME, OPT_FT_MIN_WORD_LEN,
OPT_FT_MAX_WORD_LEN, OPT_FT_MAX_WORD_LEN_FOR_SORT,
OPT_FT_MAX_WORD_LEN, OPT_FT_MAX_WORD_LEN_FOR_SORT, OPT_FT_STOPWORD_FILE,
OPT_INTERACTIVE_TIMEOUT, OPT_JOIN_BUFF_SIZE,
OPT_KEY_BUFFER_SIZE, OPT_LONG_QUERY_TIME,
OPT_LOWER_CASE_TABLE_NAMES, OPT_MAX_ALLOWED_PACKET,
......@@ -3415,7 +3415,8 @@ struct my_option my_long_options[] =
(gptr*) &max_system_variables.log_warnings, 0, GET_BOOL, NO_ARG, 0, 0, 0,
0, 0, 0},
{ "back_log", OPT_BACK_LOG,
"The number of outstanding connection requests MySQL can have. This comes into play when the main MySQL thread gets very many connection requests in a very short time.", (gptr*) &back_log, (gptr*) &back_log, 0, GET_ULONG,
"The number of outstanding connection requests MySQL can have. This comes into play when the main MySQL thread gets very many connection requests in a very short time.",
(gptr*) &back_log, (gptr*) &back_log, 0, GET_ULONG,
REQUIRED_ARG, 50, 1, 65535, 0, 1, 0 },
#ifdef HAVE_BERKELEY_DB
{ "bdb_cache_size", OPT_BDB_CACHE_SIZE,
......@@ -3468,9 +3469,13 @@ struct my_option my_long_options[] =
(gptr*) &ft_max_word_len, (gptr*) &ft_max_word_len, 0, GET_ULONG,
REQUIRED_ARG, HA_FT_MAXLEN, 10, HA_FT_MAXLEN, 0, 1, 0},
{ "ft_max_word_len_for_sort", OPT_FT_MAX_WORD_LEN_FOR_SORT,
"Undocumented", (gptr*) &ft_max_word_len_for_sort,
(gptr*) &ft_max_word_len_for_sort, 0, GET_ULONG, REQUIRED_ARG, 20, 4,
HA_FT_MAXLEN, 0, 1, 0},
"The maximum length of the word for repair_by_sorting. Longer words are included the slow way. The lower this value, the more words will be put in one sort bucket.",
(gptr*) &ft_max_word_len_for_sort, (gptr*) &ft_max_word_len_for_sort, 0, GET_ULONG,
REQUIRED_ARG, 20, 4, HA_FT_MAXLEN, 0, 1, 0},
{ "ft_stopword_file", OPT_FT_STOPWORD_FILE,
"Use stopwords from this file instead of built-in list.",
(gptr*) &ft_stopword_file, (gptr*) &ft_stopword_file, 0, GET_STR,
REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
#ifdef HAVE_INNOBASE_DB
{"innodb_mirrored_log_groups", OPT_INNODB_MIRRORED_LOG_GROUPS,
"Number of identical copies of log groups we keep for the database. Currently this should be set to 1.",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment