Bug#27287 extractvalue() (and updatexml()) extremely slow for large XML

Performance improvements made.
ExtractValue for large XML values is now much faster
(about 2000 times faster of 1Mb-long XML values).
parent 2c12ddc1
...@@ -2611,35 +2611,27 @@ typedef struct ...@@ -2611,35 +2611,27 @@ typedef struct
uint level; uint level;
String *pxml; // parsed XML String *pxml; // parsed XML
uint pos[MAX_LEVEL]; // Tag position stack uint pos[MAX_LEVEL]; // Tag position stack
uint parent; // Offset of the parent of the current node
} MY_XML_USER_DATA; } MY_XML_USER_DATA;
/* static bool
Find the parent node append_node(String *str, MY_XML_NODE *node)
SYNOPSYS
Find the parent node, i.e. a tag or attrubute node on the given level.
RETURN
1 - success
0 - failure
*/
static uint xml_parent_tag(MY_XML_NODE *items, uint nitems, uint level)
{ {
if (!nitems) /*
return 0; If "str" doesn't have space for a new node,
it will allocate two times more space that it has had so far.
MY_XML_NODE *p, *last= &items[nitems-1]; (2*len+512) is a heuristic value,
for (p= last; p >= items; p--) which gave the best performance during tests.
{ The ideas behind this formula are:
if (p->level == level && - It allows to have a very small number of reallocs:
(p->type == MY_XML_NODE_TAG || about 10 reallocs on a 1Mb-long XML value.
p->type == MY_XML_NODE_ATTR)) - At the same time, it avoids excessive memory use.
{ */
return p - items; if (str->reserve(sizeof(MY_XML_NODE), 2 * str->length() + 512))
} return TRUE;
} str->q_append((const char*) node, sizeof(MY_XML_NODE));
return 0; return FALSE;
} }
...@@ -2661,19 +2653,17 @@ extern "C" int xml_enter(MY_XML_PARSER *st,const char *attr, size_t len); ...@@ -2661,19 +2653,17 @@ extern "C" int xml_enter(MY_XML_PARSER *st,const char *attr, size_t len);
int xml_enter(MY_XML_PARSER *st,const char *attr, size_t len) int xml_enter(MY_XML_PARSER *st,const char *attr, size_t len)
{ {
MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data; MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data;
MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr();
uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE); uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE);
uint parent= xml_parent_tag(nodes, numnodes, data->level - 1);
MY_XML_NODE node; MY_XML_NODE node;
node.parent= data->parent; // Set parent for the new node to old parent
data->parent= numnodes; // Remember current node as new parent
data->pos[data->level]= numnodes; data->pos[data->level]= numnodes;
node.level= data->level++; node.level= data->level++;
node.type= st->current_node_type; // TAG or ATTR node.type= st->current_node_type; // TAG or ATTR
node.beg= attr; node.beg= attr;
node.end= attr + len; node.end= attr + len;
node.parent= parent; return append_node(data->pxml, &node) ? MY_XML_ERROR : MY_XML_OK;
data->pxml->append((const char*) &node, sizeof(MY_XML_NODE));
return MY_XML_OK;
} }
...@@ -2694,18 +2684,14 @@ extern "C" int xml_value(MY_XML_PARSER *st,const char *attr, size_t len); ...@@ -2694,18 +2684,14 @@ extern "C" int xml_value(MY_XML_PARSER *st,const char *attr, size_t len);
int xml_value(MY_XML_PARSER *st,const char *attr, size_t len) int xml_value(MY_XML_PARSER *st,const char *attr, size_t len)
{ {
MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data; MY_XML_USER_DATA *data= (MY_XML_USER_DATA*)st->user_data;
MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr();
uint numnodes= data->pxml->length() / sizeof(MY_XML_NODE);
uint parent= xml_parent_tag(nodes, numnodes, data->level - 1);
MY_XML_NODE node; MY_XML_NODE node;
node.parent= data->parent; // Set parent for the new text node to old parent
node.level= data->level; node.level= data->level;
node.type= MY_XML_NODE_TEXT; node.type= MY_XML_NODE_TEXT;
node.beg= attr; node.beg= attr;
node.end= attr + len; node.end= attr + len;
node.parent= parent; return append_node(data->pxml, &node) ? MY_XML_ERROR : MY_XML_OK;
data->pxml->append((const char*) &node, sizeof(MY_XML_NODE));
return MY_XML_OK;
} }
...@@ -2730,6 +2716,7 @@ int xml_leave(MY_XML_PARSER *st,const char *attr, size_t len) ...@@ -2730,6 +2716,7 @@ int xml_leave(MY_XML_PARSER *st,const char *attr, size_t len)
data->level--; data->level--;
MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr(); MY_XML_NODE *nodes= (MY_XML_NODE*) data->pxml->ptr();
data->parent= nodes[data->parent].parent;
nodes+= data->pos[data->level]; nodes+= data->pos[data->level];
nodes->tagend= st->cur; nodes->tagend= st->cur;
...@@ -2760,6 +2747,7 @@ String *Item_xml_str_func::parse_xml(String *raw_xml, String *parsed_xml_buf) ...@@ -2760,6 +2747,7 @@ String *Item_xml_str_func::parse_xml(String *raw_xml, String *parsed_xml_buf)
p.flags= MY_XML_FLAG_RELATIVE_NAMES | MY_XML_FLAG_SKIP_TEXT_NORMALIZATION; p.flags= MY_XML_FLAG_RELATIVE_NAMES | MY_XML_FLAG_SKIP_TEXT_NORMALIZATION;
user_data.level= 0; user_data.level= 0;
user_data.pxml= parsed_xml_buf; user_data.pxml= parsed_xml_buf;
user_data.parent= 0;
my_xml_set_enter_handler(&p, xml_enter); my_xml_set_enter_handler(&p, xml_enter);
my_xml_set_value_handler(&p, xml_value); my_xml_set_value_handler(&p, xml_value);
my_xml_set_leave_handler(&p, xml_leave); my_xml_set_leave_handler(&p, xml_leave);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment