Commit a3f44515 authored by svoj@mysql.com's avatar svoj@mysql.com

WL#2575 - Fulltext: Parser plugin for FTS

Manual merge.
parent 70cc1a08
......@@ -28,7 +28,8 @@ SUBDIRS = . include @docs_dirs@ @zlib_dir@ @yassl_dir@ \
@sql_server@ scripts @man_dirs@ tests \
@mysql_se_plugins@ \
netware @libmysqld_dirs@ \
@bench_dirs@ support-files @tools_dirs@
@bench_dirs@ support-files @tools_dirs@ \
plugin
DIST_SUBDIRS = . include @docs_dirs@ zlib \
@readline_topdir@ sql-common \
......@@ -37,7 +38,8 @@ DIST_SUBDIRS = . include @docs_dirs@ zlib \
vio sql libmysql_r libmysql client scripts \
@man_dirs@ tests SSL\
BUILD netware os2 @libmysqld_dirs@\
@bench_dirs@ support-files server-tools tools
@bench_dirs@ support-files server-tools tools \
plugin
# Run these targets before any others, also make part of clean target,
# to make sure we create new links after a clean.
......
......@@ -2581,7 +2581,9 @@ AC_CONFIG_FILES(Makefile extra/Makefile mysys/Makefile dnl
cmd-line-utils/Makefile dnl
cmd-line-utils/libedit/Makefile dnl
zlib/Makefile dnl
cmd-line-utils/readline/Makefile)
cmd-line-utils/readline/Makefile dnl
plugin/Makefile dnl
plugin/fulltext/Makefile)
AC_CONFIG_COMMANDS([default], , test -z "$CONFIG_HEADERS" || echo timestamp > stamp-h)
AC_OUTPUT
......
......@@ -51,6 +51,7 @@
#define HA_OPEN_DELAY_KEY_WRITE 8 /* Don't update index */
#define HA_OPEN_ABORT_IF_CRASHED 16
#define HA_OPEN_FOR_REPAIR 32 /* open even if crashed */
#define HA_OPEN_FROM_SQL_LAYER 64
/* The following is parameter to ha_rkey() how to use key */
......@@ -246,6 +247,7 @@ enum ha_base_keytype {
#define HA_OPTION_DELAY_KEY_WRITE 64
#define HA_OPTION_NO_PACK_KEYS 128 /* Reserved for MySQL */
#define HA_OPTION_CREATE_FROM_ENGINE 256
#define HA_OPTION_RELIES_ON_SQL_LAYER 512
#define HA_OPTION_TEMP_COMPRESS_RECORD ((uint) 16384) /* set by isamchk */
#define HA_OPTION_READ_ONLY_DATA ((uint) 32768) /* Set by isamchk */
......@@ -256,6 +258,7 @@ enum ha_base_keytype {
#define HA_CREATE_TMP_TABLE 4
#define HA_CREATE_CHECKSUM 8
#define HA_CREATE_DELAY_KEY_WRITE 64
#define HA_CREATE_RELIES_ON_SQL_LAYER 128
/*
The following flags (OR-ed) are passed to handler::info() method.
......
......@@ -198,6 +198,7 @@ typedef struct st_mi_keydef /* Key definition with open & info */
uint16 maxlength; /* max length of (packed) key (auto) */
uint16 block_size; /* block_size (auto) */
uint32 version; /* For concurrent read/write */
uint32 ftparser_nr; /* distinct ftparser number */
HA_KEYSEG *seg,*end;
struct st_mysql_ftparser *parser; /* Fulltext [pre]parser */
......
......@@ -66,36 +66,50 @@ struct st_mysql_plugin
/* Parsing modes. Set in MYSQL_FTPARSER_PARAM::mode */
/*
The fast and simple mode. Parser is expected to return only those words that
go into the index. Stopwords or too short/long words should not be returned.
'boolean_info' argument of mysql_add_word() does not have to be set.
Fast and simple mode. This mode is used for indexing, and natural
language queries.
This mode is used for indexing, and natural language queries.
The parser is expected to return only those words that go into the
index. Stopwords or too short/long words should not be returned. The
'boolean_info' argument of mysql_add_word() does not have to be set.
*/
#define MYSQL_FTPARSER_SIMPLE_MODE 0
/*
The parser is not allowed to ignore words in this mode. Every word should
be returned, including stopwords and words that are too short or long.
'boolean_info' argument of mysql_add_word() does not have to be set.
Parse with stopwords mode. This mode is used in boolean searches for
"phrase matching."
This mode is used in boolean searches for "phrase matching."
The parser is not allowed to ignore words in this mode. Every word
should be returned, including stopwords and words that are too short
or long. The 'boolean_info' argument of mysql_add_word() does not
have to be set.
*/
#define MYSQL_FTPARSER_WITH_STOPWORDS 1
/*
Parse in boolean mode. The parser should provide a valid
MYSQL_FTPARSER_BOOLEAN_INFO structure in the 'boolean_info' argument
to mysql_add_word(). Usually that means that the parser should
recognize boolean operators in the parsing stream and set appropriate
fields in MYSQL_FTPARSER_BOOLEAN_INFO structure accordingly. As
for MYSQL_FTPARSER_WITH_STOPWORDS mode, no word should be ignored.
Parse in boolean mode. This mode is used to parse a boolean query string.
The parser should provide a valid MYSQL_FTPARSER_BOOLEAN_INFO
structure in the 'boolean_info' argument to mysql_add_word().
Usually that means that the parser should recognize boolean operators
in the parsing stream and set appropriate fields in
MYSQL_FTPARSER_BOOLEAN_INFO structure accordingly. As for
MYSQL_FTPARSER_WITH_STOPWORDS mode, no word should be ignored.
Instead, use FT_TOKEN_STOPWORD for the token type of such a word.
This mode is used to parse a boolean query string.
*/
#define MYSQL_FTPARSER_FULL_BOOLEAN_INFO 2
/*
Token types for boolean mode searching (used for the type member of
MYSQL_FTPARSER_BOOLEAN_INFO struct)
FT_TOKEN_EOF: End of data.
FT_TOKEN_WORD: Regular word.
FT_TOKEN_LEFT_PAREN: Left parenthesis (start of group/sub-expression).
FT_TOKEN_RIGHT_PAREN: Right parenthesis (end of group/sub-expression).
FT_TOKEN_STOPWORD: Stopword.
*/
enum enum_ft_token_type
{
FT_TOKEN_EOF= 0,
......@@ -110,8 +124,27 @@ enum enum_ft_token_type
boolean-mode metadata to the MySQL search engine for every word in
the search query. A valid instance of this structure must be filled
in by the plugin parser and passed as an argument in the call to
mysql_add_word (the function from structure MYSQL_FTPARSER_PARAM)
when a query is parsed in boolean mode.
mysql_add_word (the callback function in the MYSQL_FTPARSER_PARAM
structure) when a query is parsed in boolean mode.
type: The token type. Should be one of the enum_ft_token_type values.
yesno: Whether the word must be present for a match to occur:
>0 Must be present
<0 Must not be present
0 Neither; the word is optional but its presence increases the relevance
With the default settings of the ft_boolean_syntax system variable,
>0 corresponds to the '+' operator, <0 corrresponds to the '-' operator,
and 0 means neither operator was used.
weight_adjust: A weighting factor that determines how much a match
for the word counts. Can be used to increase or decrease the word's
importance.
wasign: The sign of the weight_adjust value.
trunc: Corresponds to the '*' operator in the default setting of the
ft_boolean_syntax system variable.
*/
typedef struct st_mysql_ftparser_boolean_info
......@@ -129,48 +162,63 @@ typedef struct st_mysql_ftparser_boolean_info
/*
An argument of the full-text parser plugin. This structure is
filled by MySQL server and passed to the parsing function of the
filled in by MySQL server and passed to the parsing function of the
plugin as an in/out parameter.
mysql_parse: A pointer to the built-in parser implementation of the
server. It's set by the server and can be used by the parser plugin
to invoke the MySQL default parser. If plugin's role is to extract
textual data from .doc, .pdf or .xml content, it might extract
plaintext from the content, and then pass the text to the default
MySQL parser to be parsed. When mysql_parser is called, its param
argument should be given as the mysql_ftparam value.
mysql_add_word: A server callback to add a new word. When parsing
a document, the server sets this to point at a function that adds
the word to MySQL full-text index. When parsing a search query,
this function will add the new word to the list of words to search
for. When mysql_add_word is called, its param argument should be
given as the mysql_ftparam value. boolean_info can be NULL for all
cases except when mode is MYSQL_FTPARSER_FULL_BOOLEAN_INFO.
ftparser_state: A generic pointer. The plugin can set it to point
to information to be used internally for its own purposes.
mysql_ftparam: This is set by the server. It is passed as the first
argument to the mysql_parse or mysql_add_word callback. The plugin
should not modify it.
cs: Information about the character set of the document or query string.
doc: A pointer to the document or query string to be parsed.
length: Length of the document or query string, in bytes.
mode: The parsing mode. With boolean operators, with stopwords, or
nothing. See MYSQL_FTPARSER_* constants above.
*/
typedef struct st_mysql_ftparser_param
{
/*
A fallback pointer to the built-in parser implementation
of the server. It's set by the server and can be used
by the parser plugin to invoke the MySQL default parser.
If plugin's role is to extract textual data from .doc,
.pdf or .xml content, it might use the default MySQL parser
to parse the extracted plaintext string.
*/
int (*mysql_parse)(void *param, byte *doc, uint doc_len);
/*
A server callback to add a new word.
When parsing a document, the server sets this to point at
a function that adds the word to MySQL full-text index.
When parsing a search query, this function will
add the new word to the list of words to search for.
boolean_info can be NULL for all cases except
MYSQL_FTPARSER_FULL_BOOLEAN_INFO mode.
*/
int (*mysql_add_word)(void *param, byte *word, uint word_len,
MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info);
/* A pointer to the parser local state. This is an inout parameter. */
void *ftparser_state;
void *mysql_ftparam;
/* Character set of the document or the query */
CHARSET_INFO *cs;
/* A pointer to the document or the query to be parsed */
byte *doc;
/* Document/query length */
uint length;
/*
Parsing mode: with boolean operators, with stopwords, or nothing.
See MYSQL_FTPARSER_* constants above.
*/
int mode;
} MYSQL_FTPARSER_PARAM;
/*
Full-text parser descriptor.
interface_version is, e.g., MYSQL_FTPARSER_INTERFACE_VERSION.
The parsing, initialization, and deinitialization functions are
invoked per SQL statement for which the parser is used.
*/
struct st_mysql_ftparser
{
int interface_version;
......
SUBDIRS= fulltext
INCLUDES= -I$(top_builddir)/include
noinst_LTLIBRARIES= libmypluglib.la
libmypluglib_la_SOURCES= plugin_example.c
libmypluglib_la_LDFLAGS= -module
/*
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include <my_global.h>
#include <m_string.h>
#include <m_ctype.h>
#include <plugin.h>
/*
Simple full-text parser plugin that acts as a replacement for the
built-in full-text parser:
- All non-whitespace characters are significant and are interpreted as
"word characters."
- Whitespace characters are space, tab, CR, LF.
- There is no minimum word length. Non-whitespace sequences of one
character or longer are words.
- Stopwords are used in non-boolean mode, not used in boolean mode.
*/
/*
simple_parser interface functions:
Plugin declaration functions:
- simple_parser_plugin_init()
- simple_parser_plugin_deinit()
Parser descriptor functions:
- simple_parser_parse()
- simple_parser_init()
- simple_parser_deinit()
*/
/*
Initialize the parser plugin at server start or plugin installation.
SYNOPSIS
simple_parser_plugin_init()
DESCRIPTION
Does nothing.
RETURN VALUE
0 success
1 failure (cannot happen)
*/
static int simple_parser_plugin_init(void)
{
return(0);
}
/*
Terminate the parser plugin at server shutdown or plugin deinstallation.
SYNOPSIS
simple_parser_plugin_deinit()
Does nothing.
RETURN VALUE
0 success
1 failure (cannot happen)
*/
static int simple_parser_plugin_deinit(void)
{
return(0);
}
/*
Initialize the parser at ... [WHEN]
SYNOPSIS
simple_parser_init()
DESCRIPTION
Does nothing.
RETURN VALUE
0 success
1 failure (cannot happen)
*/
static int simple_parser_init(MYSQL_FTPARSER_PARAM *param)
{
return(0);
}
/*
Terminate the parser at ... [WHEN]
SYNOPSIS
simple_parser_deinit()
DESCRIPTION
Does nothing.
RETURN VALUE
0 success
1 failure (cannot happen)
*/
static int simple_parser_deinit(MYSQL_FTPARSER_PARAM *param)
{
return(0);
}
/*
Pass a word back to the server.
SYNOPSIS
add_word()
param parsing context of the plugin
word a word
len word length
DESCRIPTION
Fill in boolean metadata for the word (if parsing in boolean mode)
and pass the word to the server. The server adds the word to
a full-text index when parsing for indexing, or adds the word to
the list of search terms when parsing a search string.
*/
static void add_word(MYSQL_FTPARSER_PARAM *param, char *word, size_t len)
{
MYSQL_FTPARSER_BOOLEAN_INFO bool_info=
{ FT_TOKEN_WORD, 0, 0, 0, 0, ' ', 0 };
if (param->mode == MYSQL_FTPARSER_FULL_BOOLEAN_INFO)
param->mysql_add_word(param->mysql_ftparam, word, len, &bool_info);
else
param->mysql_add_word(param->mysql_ftparam, word, len, 0);
}
/*
Parse a document or a search query.
SYNOPSIS
simple_parser_parse()
param parsing context
DESCRIPTION
This is the main plugin function which is called to parse
a document or a search query. The call mode is set in
param->mode. This function simply splits the text into words
and passes every word to the MySQL full-text indexing engine.
*/
int simple_parser_parse(MYSQL_FTPARSER_PARAM *param)
{
char *end, *start, *docend= param->doc + param->length;
for (end= start= param->doc;; end++)
{
if (end == docend)
{
if (end > start)
add_word(param, start, end - start);
break;
}
else if (isspace(*end))
{
if (end > start)
add_word(param, start, end - start);
start= end + 1;
}
}
return(0);
}
/*
Plugin type-specific descriptor
*/
static struct st_mysql_ftparser simple_parser_descriptor=
{
MYSQL_FTPARSER_INTERFACE_VERSION, /* interface version */
simple_parser_parse, /* parsing function */
simple_parser_init, /* parser init function */
simple_parser_deinit /* parser deinit function */
};
/*
Plugin library descriptor
*/
mysql_declare_plugin
{
MYSQL_FTPARSER_PLUGIN, /* type */
&simple_parser_descriptor, /* descriptor */
"simple_parser", /* name */
"MySQL AB", /* author */
"Simple Full-Text Parser", /* description */
simple_parser_plugin_init, /* init function (when loaded) */
simple_parser_plugin_deinit /* deinit function (when unloaded) */
}
mysql_declare_plugin_end;
......@@ -297,7 +297,7 @@ err:
int ha_myisam::open(const char *name, int mode, uint test_if_locked)
{
uint i;
if (!(file=mi_open(name, mode, test_if_locked)))
if (!(file=mi_open(name, mode, test_if_locked | HA_OPEN_FROM_SQL_LAYER)))
return (my_errno ? my_errno : -1);
if (test_if_locked & (HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_TMP_TABLE))
......@@ -1473,6 +1473,8 @@ int ha_myisam::create(const char *name, register TABLE *table_arg,
pos=table_arg->key_info;
for (i=0; i < share->keys ; i++, pos++)
{
if (pos->flags & HA_USES_PARSER)
create_flags|= HA_CREATE_RELIES_ON_SQL_LAYER;
keydef[i].flag= (pos->flags & (HA_NOSAME | HA_FULLTEXT | HA_SPATIAL));
keydef[i].key_alg= pos->algorithm == HA_KEY_ALG_UNDEF ?
(pos->flags & HA_SPATIAL ? HA_KEY_ALG_RTREE : HA_KEY_ALG_BTREE) :
......
......@@ -21,12 +21,31 @@
char *opt_plugin_dir_ptr;
char opt_plugin_dir[FN_REFLEN];
const char *plugin_type_names[]=
{
"UDF",
"STORAGE ENGINE",
"FTPARSER"
};
static const char *plugin_interface_version_sym=
"_mysql_plugin_interface_version_";
static const char *plugin_declarations_sym= "_mysql_plugin_declarations_";
static int min_plugin_interface_version= 0x0000;
/* Note that 'int version' must be the first field of every plugin
sub-structure (plugin->info).
*/
static int min_plugin_info_interface_version[MYSQL_MAX_PLUGIN_TYPE_NUM]=
{
0x0000,
0x0000,
0x0000
};
static int cur_plugin_info_interface_version[MYSQL_MAX_PLUGIN_TYPE_NUM]=
{
0x0000, /* UDF: not implemented */
0x0000, /* STORAGE ENGINE: not implemented */
MYSQL_FTPARSER_INTERFACE_VERSION
};
static DYNAMIC_ARRAY plugin_dl_array;
static DYNAMIC_ARRAY plugin_array;
static HASH plugin_hash[MYSQL_MAX_PLUGIN_TYPE_NUM];
......@@ -51,6 +70,27 @@ static struct st_plugin_dl *plugin_dl_find(LEX_STRING *dl)
}
static st_plugin_dl *plugin_dl_insert_or_reuse(struct st_plugin_dl *plugin_dl)
{
uint i;
DBUG_ENTER("plugin_dl_insert_or_reuse");
for (i= 0; i < plugin_dl_array.elements; i++)
{
struct st_plugin_dl *tmp= dynamic_element(&plugin_dl_array, i,
struct st_plugin_dl *);
if (! tmp->ref_count)
{
memcpy(tmp, plugin_dl, sizeof(struct st_plugin_dl));
DBUG_RETURN(tmp);
}
}
if (insert_dynamic(&plugin_dl_array, (gptr)plugin_dl))
DBUG_RETURN(0);
DBUG_RETURN(dynamic_element(&plugin_dl_array, plugin_dl_array.elements - 1,
struct st_plugin_dl *));
}
static st_plugin_dl *plugin_dl_add(LEX_STRING *dl, int report)
{
#ifdef HAVE_DLOPEN
......@@ -144,7 +184,7 @@ static st_plugin_dl *plugin_dl_add(LEX_STRING *dl, int report)
&dummy_errors);
plugin_dl.dl.str[plugin_dl.dl.length]= 0;
/* Add this dll to array */
if (insert_dynamic(&plugin_dl_array, (gptr)&plugin_dl))
if (! (tmp= plugin_dl_insert_or_reuse(&plugin_dl)))
{
dlclose(plugin_dl.handle);
my_free(plugin_dl.dl.str, MYF(0));
......@@ -154,8 +194,7 @@ static st_plugin_dl *plugin_dl_add(LEX_STRING *dl, int report)
sql_print_error(ER(ER_OUTOFMEMORY), sizeof(struct st_plugin_dl));
DBUG_RETURN(0);
}
DBUG_RETURN(dynamic_element(&plugin_dl_array, plugin_dl_array.elements - 1,
struct st_plugin_dl *));
DBUG_RETURN(tmp);
#else
DBUG_ENTER("plugin_dl_add");
if (report & REPORT_TO_USER)
......@@ -236,7 +275,7 @@ my_bool plugin_is_ready(LEX_STRING *name, int type)
struct st_plugin_int *plugin_lock(LEX_STRING *name, int type)
{
struct st_plugin_int *rc;
DBUG_ENTER("plugin_find");
DBUG_ENTER("plugin_lock");
rw_wrlock(&THR_LOCK_plugin);
if ((rc= plugin_find_internal(name, type)))
{
......@@ -250,6 +289,27 @@ struct st_plugin_int *plugin_lock(LEX_STRING *name, int type)
}
static st_plugin_int *plugin_insert_or_reuse(struct st_plugin_int *plugin)
{
uint i;
DBUG_ENTER("plugin_insert_or_reuse");
for (i= 0; i < plugin_array.elements; i++)
{
struct st_plugin_int *tmp= dynamic_element(&plugin_array, i,
struct st_plugin_int *);
if (tmp->state == PLUGIN_IS_FREED)
{
memcpy(tmp, plugin, sizeof(struct st_plugin_int));
DBUG_RETURN(tmp);
}
}
if (insert_dynamic(&plugin_array, (gptr)plugin))
DBUG_RETURN(0);
DBUG_RETURN(dynamic_element(&plugin_array, plugin_array.elements - 1,
struct st_plugin_int *));
}
static my_bool plugin_add(LEX_STRING *name, LEX_STRING *dl, int report)
{
struct st_plugin_int tmp;
......@@ -275,12 +335,28 @@ static my_bool plugin_add(LEX_STRING *name, LEX_STRING *dl, int report)
(const uchar *)plugin->name,
name_len))
{
struct st_plugin_int *tmp_plugin_ptr;
if (*(int*)plugin->info <
min_plugin_info_interface_version[plugin->type] ||
((*(int*)plugin->info) >> 8) >
(cur_plugin_info_interface_version[plugin->type] >> 8))
{
char buf[256];
strxnmov(buf, sizeof(buf) - 1, "API version for ",
plugin_type_names[plugin->type], " plugin is too different",
NullS);
if (report & REPORT_TO_USER)
my_error(ER_CANT_OPEN_LIBRARY, MYF(0), dl->str, 0, buf);
if (report & REPORT_TO_LOG)
sql_print_error(ER(ER_CANT_OPEN_LIBRARY), dl->str, 0, buf);
goto err;
}
tmp.plugin= plugin;
tmp.name.str= (char *)plugin->name;
tmp.name.length= name_len;
tmp.ref_count= 0;
tmp.state= PLUGIN_IS_UNINITIALIZED;
if (insert_dynamic(&plugin_array, (gptr)&tmp))
if (! (tmp_plugin_ptr= plugin_insert_or_reuse(&tmp)))
{
if (report & REPORT_TO_USER)
my_error(ER_OUTOFMEMORY, MYF(0), sizeof(struct st_plugin_int));
......@@ -288,14 +364,9 @@ static my_bool plugin_add(LEX_STRING *name, LEX_STRING *dl, int report)
sql_print_error(ER(ER_OUTOFMEMORY), sizeof(struct st_plugin_int));
goto err;
}
if (my_hash_insert(&plugin_hash[plugin->type],
(byte*)dynamic_element(&plugin_array,
plugin_array.elements - 1,
struct st_plugin_int *)))
if (my_hash_insert(&plugin_hash[plugin->type], (byte*)tmp_plugin_ptr))
{
struct st_plugin_int *tmp_plugin= dynamic_element(&plugin_array,
plugin_array.elements - 1, struct st_plugin_int *);
tmp_plugin->state= PLUGIN_IS_FREED;
tmp_plugin_ptr->state= PLUGIN_IS_FREED;
if (report & REPORT_TO_USER)
my_error(ER_OUTOFMEMORY, MYF(0), sizeof(struct st_plugin_int));
if (report & REPORT_TO_LOG)
......@@ -332,7 +403,7 @@ static void plugin_del(LEX_STRING *name)
void plugin_unlock(struct st_plugin_int *plugin)
{
DBUG_ENTER("plugin_release");
DBUG_ENTER("plugin_unlock");
rw_wrlock(&THR_LOCK_plugin);
DBUG_ASSERT(plugin && plugin->ref_count);
plugin->ref_count--;
......
......@@ -53,6 +53,7 @@ struct st_plugin_int
extern char *opt_plugin_dir_ptr;
extern char opt_plugin_dir[FN_REFLEN];
extern const char *plugin_type_names[];
extern int plugin_init(void);
extern void plugin_load(void);
extern void plugin_free(void);
......
......@@ -135,22 +135,7 @@ static my_bool show_plugins(THD *thd, st_plugin_int *plugin,
DBUG_ASSERT(0);
}
switch (plug->type)
{
case MYSQL_UDF_PLUGIN:
table->field[3]->store(STRING_WITH_LEN("UDF"), cs);
break;
case MYSQL_STORAGE_ENGINE_PLUGIN:
table->field[3]->store(STRING_WITH_LEN("STORAGE"), cs);
break;
case MYSQL_FTPARSER_PLUGIN:
table->field[3]->store(STRING_WITH_LEN("FTPARSER"), cs);
break;
default:
table->field[3]->store(STRING_WITH_LEN("UNKNOWN"), cs);
break;
}
table->field[3]->store(STRING_WITH_LEN(plugin_type_names[plug->type]), cs);
table->field[4]->store(version_buf,
make_version_string(version_buf, sizeof(version_buf),
*(uint *)plug->info), cs);
......
......@@ -4427,7 +4427,9 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name,
key_name,
key_info->algorithm,
test(key_info->flags & HA_GENERATED_KEY),
key_parts));
key_parts,
key_info->flags & HA_USES_PARSER ?
&key_info->parser->name : 0));
}
{
Key *key;
......
......@@ -284,28 +284,29 @@ static int ftb_parse_query_internal(void *param, byte *query, uint len)
static void _ftb_parse_query(FTB *ftb, byte *query, uint len,
struct st_mysql_ftparser *parser)
{
MYSQL_FTPARSER_PARAM param;
MYSQL_FTPARSER_PARAM *param;
MY_FTB_PARAM ftb_param;
DBUG_ENTER("_ftb_parse_query");
DBUG_ASSERT(parser);
if (ftb->state != UNINITIALIZED)
return;
DBUG_VOID_RETURN;
ftb_param.ftb= ftb;
ftb_param.depth= 0;
ftb_param.ftbe= ftb->root;
ftb_param.up_quot= 0;
param.mysql_parse= ftb_parse_query_internal;
param.mysql_add_word= ftb_query_add_word;
param.ftparser_state= 0;
param.mysql_ftparam= (void *)&ftb_param;
param.cs= ftb->charset;
param.doc= query;
param.length= len;
param.mode= MYSQL_FTPARSER_FULL_BOOLEAN_INFO;
parser->parse(&param);
if (! (param= ftparser_call_initializer(ftb->info, ftb->keynr)))
DBUG_VOID_RETURN;
param->mysql_parse= ftb_parse_query_internal;
param->mysql_add_word= ftb_query_add_word;
param->mysql_ftparam= (void *)&ftb_param;
param->cs= ftb->charset;
param->doc= query;
param->length= len;
param->mode= MYSQL_FTPARSER_FULL_BOOLEAN_INFO;
parser->parse(param);
DBUG_VOID_RETURN;
}
......@@ -629,30 +630,30 @@ static int ftb_check_phrase_internal(void *param, byte *document, uint len)
1 is returned if phrase found, 0 else.
*/
static int _ftb_check_phrase(const byte *document, uint len,
FTB_EXPR *ftbe, CHARSET_INFO *cs,
struct st_mysql_ftparser *parser)
static int _ftb_check_phrase(FTB *ftb, const byte *document, uint len,
FTB_EXPR *ftbe, struct st_mysql_ftparser *parser)
{
MY_FTB_PHRASE_PARAM ftb_param;
MYSQL_FTPARSER_PARAM param;
MYSQL_FTPARSER_PARAM *param;
DBUG_ENTER("_ftb_check_phrase");
DBUG_ASSERT(parser);
if (! (param= ftparser_call_initializer(ftb->info, ftb->keynr)))
DBUG_RETURN(0);
ftb_param.phrase= ftbe->phrase;
ftb_param.document= ftbe->document;
ftb_param.cs= cs;
ftb_param.cs= ftb->charset;
ftb_param.phrase_length= list_length(ftbe->phrase);
ftb_param.document_length= 1;
ftb_param.match= 0;
param.mysql_parse= ftb_check_phrase_internal;
param.mysql_add_word= ftb_phrase_add_word;
param.ftparser_state= 0;
param.mysql_ftparam= (void *)&ftb_param;
param.cs= cs;
param.doc= (byte *)document;
param.length= len;
param.mode= MYSQL_FTPARSER_WITH_STOPWORDS;
parser->parse(&param);
param->mysql_parse= ftb_check_phrase_internal;
param->mysql_add_word= ftb_phrase_add_word;
param->mysql_ftparam= (void *)&ftb_param;
param->cs= ftb->charset;
param->doc= (byte *)document;
param->length= len;
param->mode= MYSQL_FTPARSER_WITH_STOPWORDS;
parser->parse(param);
DBUG_RETURN(ftb_param.match ? 1 : 0);
}
......@@ -696,8 +697,8 @@ static void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_
{
if (!ftsi.pos)
continue;
not_found = ! _ftb_check_phrase(ftsi.pos, ftsi.len,
ftbe, ftb->charset, parser);
not_found = ! _ftb_check_phrase(ftb, ftsi.pos, ftsi.len,
ftbe, parser);
}
if (not_found) break;
} /* ftbe->quot */
......@@ -861,7 +862,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
FT_SEG_ITERATOR ftsi, ftsi2;
my_off_t docid=ftb->info->lastpos;
MY_FTB_FIND_PARAM ftb_param;
MYSQL_FTPARSER_PARAM param;
MYSQL_FTPARSER_PARAM *param;
struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
&ft_default_parser :
ftb->info->s->keyinfo[ftb->keynr].parser;
......@@ -870,6 +871,8 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
return -2.0;
if (!ftb->queue.elements)
return 0;
if (! (param= ftparser_call_initializer(ftb->info, ftb->keynr)))
return 0;
if (ftb->state != INDEX_SEARCH && docid <= ftb->lastpos)
{
......@@ -894,20 +897,20 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length)
ftb_param.ftb= ftb;
ftb_param.ftsi= &ftsi2;
param.mysql_parse= ftb_find_relevance_parse;
param.mysql_add_word= ftb_find_relevance_add_word;
param.ftparser_state= 0;
param.mysql_ftparam= (void *)&ftb_param;
param.cs= ftb->charset;
param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
while (_mi_ft_segiterator(&ftsi))
{
if (!ftsi.pos)
continue;
param.doc= (byte *)ftsi.pos;
param.length= ftsi.len;
parser->parse(&param);
/* Since subsequent call to _ftb_check_phrase overwrites param elements,
it must be reinitialized at each iteration _inside_ the loop. */
param->mysql_parse= ftb_find_relevance_parse;
param->mysql_add_word= ftb_find_relevance_add_word;
param->mysql_ftparam= (void *)&ftb_param;
param->cs= ftb->charset;
param->mode= MYSQL_FTPARSER_SIMPLE_MODE;
param->doc= (byte *)ftsi.pos;
param->length= ftsi.len;
parser->parse(param);
}
ftbe=ftb->root;
if (ftbe->docid[1]==docid && ftbe->cur_weight>0 &&
......
......@@ -210,6 +210,8 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query,
FT_DOC *dptr;
FT_INFO *dlist=NULL;
my_off_t saved_lastpos=info->lastpos;
struct st_mysql_ftparser *parser;
MYSQL_FTPARSER_PARAM *ftparser_param;
DBUG_ENTER("ft_init_nlq_search");
/* black magic ON */
......@@ -223,6 +225,9 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query,
aio.keynr=keynr;
aio.charset=info->s->keyinfo[keynr].seg->charset;
aio.keybuff=info->lastkey+info->s->base.max_key_length;
parser= info->s->keyinfo[keynr].parser;
if (! (ftparser_param= ftparser_call_initializer(info, keynr)))
goto err;
bzero(&wtree,sizeof(wtree));
......@@ -230,7 +235,7 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query,
NULL, NULL);
ft_parse_init(&wtree, aio.charset);
if (ft_parse(&wtree, query, query_len, 0, info->s->keyinfo[keynr].parser))
if (ft_parse(&wtree, query, query_len, 0, parser, ftparser_param))
goto err;
if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio,
......@@ -250,7 +255,7 @@ FT_INFO *ft_init_nlq_search(MI_INFO *info, uint keynr, byte *query,
if (!(*info->read_record)(info,docid,record))
{
info->update|= HA_STATE_AKTIV;
_mi_ft_parse(&wtree, info, keynr, record,1);
_mi_ft_parse(&wtree, info, keynr, record, 1, ftparser_param);
}
}
delete_queue(&best);
......
......@@ -284,22 +284,110 @@ static int ft_parse_internal(void *param, byte *doc, uint doc_len)
int ft_parse(TREE *wtree, byte *doc, int doclen, my_bool with_alloc,
struct st_mysql_ftparser *parser)
struct st_mysql_ftparser *parser,
MYSQL_FTPARSER_PARAM *param)
{
MYSQL_FTPARSER_PARAM param;
MY_FT_PARSER_PARAM my_param;
DBUG_ENTER("ft_parse");
DBUG_ASSERT(parser);
my_param.wtree= wtree;
my_param.with_alloc= with_alloc;
param.mysql_parse= ft_parse_internal;
param.mysql_add_word= ft_add_word;
param.ftparser_state= 0;
param.mysql_ftparam= &my_param;
param.cs= wtree->custom_arg;
param.doc= doc;
param.length= doclen;
param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
DBUG_RETURN(parser->parse(&param));
param->mysql_parse= ft_parse_internal;
param->mysql_add_word= ft_add_word;
param->mysql_ftparam= &my_param;
param->cs= wtree->custom_arg;
param->doc= doc;
param->length= doclen;
param->mode= MYSQL_FTPARSER_SIMPLE_MODE;
DBUG_RETURN(parser->parse(param));
}
MYSQL_FTPARSER_PARAM *ftparser_call_initializer(MI_INFO *info, uint keynr)
{
uint32 ftparser_nr;
struct st_mysql_ftparser *parser;
if (! info->ftparser_param)
{
/* info->ftparser_param can not be zero after the initialization,
because it always includes built-in fulltext parser. And built-in
parser can be called even if the table has no fulltext indexes and
no varchar/text fields. */
if (! info->s->ftparsers)
{
/* It's ok that modification to shared structure is done w/o mutex
locks, because all threads would set the same variables to the
same values. */
uint i, j, keys= info->s->state.header.keys, ftparsers= 1;
for (i= 0; i < keys; i++)
{
MI_KEYDEF *keyinfo= &info->s->keyinfo[i];
if (keyinfo->flag & HA_FULLTEXT)
{
for (j= 0;; j++)
{
if (j == i)
{
keyinfo->ftparser_nr= ftparsers++;
break;
}
if (info->s->keyinfo[j].flag & HA_FULLTEXT &&
keyinfo->parser == info->s->keyinfo[j].parser)
{
keyinfo->ftparser_nr= info->s->keyinfo[j].ftparser_nr;
break;
}
}
}
}
info->s->ftparsers= ftparsers;
}
info->ftparser_param= (MYSQL_FTPARSER_PARAM *)
my_malloc(sizeof(MYSQL_FTPARSER_PARAM) *
info->s->ftparsers, MYF(MY_WME|MY_ZEROFILL));
if (! info->ftparser_param)
return 0;
}
if (keynr == NO_SUCH_KEY)
{
ftparser_nr= 0;
parser= &ft_default_parser;
}
else
{
ftparser_nr= info->s->keyinfo[keynr].ftparser_nr;
parser= info->s->keyinfo[keynr].parser;
}
if (! info->ftparser_param[ftparser_nr].mysql_add_word)
{
/* Note, that mysql_add_word is used here as a flag:
mysql_add_word == 0 - parser is not initialized
mysql_add_word != 0 - parser is initialized, or no
initialization needed. */
info->ftparser_param[ftparser_nr].mysql_add_word= (void *)1;
if (parser->init && parser->init(&info->ftparser_param[ftparser_nr]))
return 0;
}
return &info->ftparser_param[ftparser_nr];
}
void ftparser_call_deinitializer(MI_INFO *info)
{
uint i, keys= info->s->state.header.keys;
if (! info->ftparser_param)
return;
for (i= 0; i < keys; i++)
{
MI_KEYDEF *keyinfo= &info->s->keyinfo[i];
MYSQL_FTPARSER_PARAM *ftparser_param=
&info->ftparser_param[keyinfo->ftparser_nr];
if (keyinfo->flag & HA_FULLTEXT && ftparser_param->mysql_add_word)
{
if (keyinfo->parser->deinit)
keyinfo->parser->deinit(ftparser_param);
ftparser_param->mysql_add_word= 0;
}
}
}
......@@ -96,7 +96,8 @@ uint _mi_ft_segiterator(register FT_SEG_ITERATOR *ftsi)
/* parses a document i.e. calls ft_parse for every keyseg */
uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr,
const byte *record, my_bool with_alloc)
const byte *record, my_bool with_alloc,
MYSQL_FTPARSER_PARAM *param)
{
FT_SEG_ITERATOR ftsi;
struct st_mysql_ftparser *parser;
......@@ -109,7 +110,8 @@ uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr,
while (_mi_ft_segiterator(&ftsi))
{
if (ftsi.pos)
if (ft_parse(parsed, (byte *)ftsi.pos, ftsi.len, with_alloc, parser))
if (ft_parse(parsed, (byte *)ftsi.pos, ftsi.len, with_alloc, parser,
param))
DBUG_RETURN(1);
}
DBUG_RETURN(0);
......@@ -118,10 +120,12 @@ uint _mi_ft_parse(TREE *parsed, MI_INFO *info, uint keynr,
FT_WORD * _mi_ft_parserecord(MI_INFO *info, uint keynr, const byte *record)
{
TREE ptree;
MYSQL_FTPARSER_PARAM *param;
DBUG_ENTER("_mi_ft_parserecord");
if (! (param= ftparser_call_initializer(info, keynr)))
DBUG_RETURN(NULL);
bzero((char*) &ptree, sizeof(ptree));
if (_mi_ft_parse(&ptree, info, keynr, record,0))
if (_mi_ft_parse(&ptree, info, keynr, record, 0, param))
DBUG_RETURN(NULL);
DBUG_RETURN(ft_linearize(&ptree));
......
......@@ -119,10 +119,12 @@ void _mi_ft_segiterator_dummy_init(const byte *, uint, FT_SEG_ITERATOR *);
uint _mi_ft_segiterator(FT_SEG_ITERATOR *);
void ft_parse_init(TREE *, CHARSET_INFO *);
int ft_parse(TREE *, byte *, int, my_bool, struct st_mysql_ftparser *parser);
int ft_parse(TREE *, byte *, int, my_bool, struct st_mysql_ftparser *parser,
MYSQL_FTPARSER_PARAM *param);
FT_WORD * ft_linearize(TREE *);
FT_WORD * _mi_ft_parserecord(MI_INFO *, uint, const byte *);
uint _mi_ft_parse(TREE *, MI_INFO *, uint, const byte *, my_bool);
uint _mi_ft_parse(TREE *, MI_INFO *, uint, const byte *, my_bool,
MYSQL_FTPARSER_PARAM *param);
FT_INFO *ft_init_nlq_search(MI_INFO *, uint, byte *, uint, uint, byte *);
FT_INFO *ft_init_boolean_search(MI_INFO *, uint, byte *, uint, CHARSET_INFO *);
......@@ -142,4 +144,6 @@ void ft_boolean_close_search(FT_INFO *);
float ft_boolean_get_relevance(FT_INFO *);
my_off_t ft_boolean_get_docid(FT_INFO *);
void ft_boolean_reinit_search(FT_INFO *);
extern MYSQL_FTPARSER_PARAM *ftparser_call_initializer(MI_INFO *info,
uint keynr);
extern void ftparser_call_deinitializer(MI_INFO *info);
......@@ -105,6 +105,11 @@ int mi_close(register MI_INFO *info)
my_free((gptr) info->s,MYF(0));
}
pthread_mutex_unlock(&THR_LOCK_myisam);
if (info->ftparser_param)
{
my_free((gptr)info->ftparser_param, MYF(0));
info->ftparser_param= 0;
}
if (info->dfile >= 0 && my_close(info->dfile,MYF(0)))
error = my_errno;
......
......@@ -183,6 +183,8 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
}
if (flags & HA_CREATE_DELAY_KEY_WRITE)
options|= HA_OPTION_DELAY_KEY_WRITE;
if (flags & HA_CREATE_RELIES_ON_SQL_LAYER)
options|= HA_OPTION_RELIES_ON_SQL_LAYER;
packed=(packed+7)/8;
if (pack_reclength != INT_MAX32)
......
......@@ -21,7 +21,7 @@
isamdatabase.
*/
#include "myisamdef.h"
#include "ftdefs.h"
/* lock table by F_UNLCK, F_RDLCK or F_WRLCK */
......@@ -55,6 +55,7 @@ int mi_lock_database(MI_INFO *info, int lock_type)
{
switch (lock_type) {
case F_UNLCK:
ftparser_call_deinitializer(info);
if (info->lock_type == F_RDLCK)
count= --share->r_locks;
else
......
......@@ -141,12 +141,20 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
~(HA_OPTION_PACK_RECORD | HA_OPTION_PACK_KEYS |
HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA |
HA_OPTION_TEMP_COMPRESS_RECORD | HA_OPTION_CHECKSUM |
HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE))
HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE |
HA_OPTION_RELIES_ON_SQL_LAYER))
{
DBUG_PRINT("error",("wrong options: 0x%lx", share->options));
my_errno=HA_ERR_OLD_FILE;
goto err;
}
if ((share->options & HA_OPTION_RELIES_ON_SQL_LAYER) &&
! (open_flags & HA_OPEN_FROM_SQL_LAYER))
{
DBUG_PRINT("error", ("table cannot be openned from non-sql layer"));
my_errno= HA_ERR_UNSUPPORTED;
goto err;
}
/* Don't call realpath() if the name can't be a link */
if (!strcmp(name_buff, org_name) ||
my_readlink(index_name, org_name, MYF(0)) == -1)
......@@ -418,6 +426,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
pos->flag=0;
pos++;
}
share->ftparsers= 0;
}
disk_pos_assert(disk_pos + share->base.fields *MI_COLUMNDEF_SIZE, end_pos);
......@@ -1051,6 +1060,7 @@ char *mi_keydef_read(char *ptr, MI_KEYDEF *keydef)
keydef->underflow_block_length=keydef->block_length/3;
keydef->version = 0; /* Not saved */
keydef->parser = &ft_default_parser;
keydef->ftparser_nr = 0;
return ptr;
}
......
......@@ -191,6 +191,7 @@ typedef struct st_mi_isam_share { /* Shared between opens */
ulong state_diff_length;
uint rec_reflength; /* rec_reflength in use now */
uint unique_name_length;
uint32 ftparsers; /* Number of distinct ftparsers + 1 */
File kfile; /* Shared keyfile */
File data_file; /* Shared data file */
int mode; /* mode of file on open */
......@@ -231,6 +232,7 @@ struct st_myisam_info {
/* accumulate indexfile changes between write's */
TREE *bulk_insert;
DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */
MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */
char *filename; /* parameter to open filename */
uchar *buff, /* Temp area for key */
*lastkey,*lastkey2; /* Last used search key */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment