Commit 8fb685c2 authored by bar@mysql.com's avatar bar@mysql.com

Bug#18201: XML: ExtractValue works even if the xml

fragment is not well-formed xml

Problem:
- ExtractValue silently returned NULL if a wrong XML value is passed.
- In some cases "unexpected END-OF-INPUT" error was not detected, and
  a non-NULL result could be returned for a bad XML value.

Fix:
- Adding warning messages, to make user aware why NULL was returned.
- Missing "unexpected END-OF-INPUT" error is reported now.
parent 6f5999e9
...@@ -132,7 +132,7 @@ xb1 xc1 ...@@ -132,7 +132,7 @@ xb1 xc1
SELECT extractValue(@xml,'/a//@x[2]'); SELECT extractValue(@xml,'/a//@x[2]');
extractValue(@xml,'/a//@x[2]') extractValue(@xml,'/a//@x[2]')
xb2 xc2 xb2 xc2
SET @xml='<a><b>b1</b><b>b2</b><c><b>c1b1</b><b>c1b2</b></c><c><b>c2b1</c></b>/a>'; SET @xml='<a><b>b1</b><b>b2</b><c><b>c1b1</b><b>c1b2</b></c><c><b>c2b1</c></b></a>';
SELECT extractValue(@xml,'//b[1]'); SELECT extractValue(@xml,'//b[1]');
extractValue(@xml,'//b[1]') extractValue(@xml,'//b[1]')
b1 c1b1 c2b1 b1 c1b1 c2b1
...@@ -612,6 +612,36 @@ extractvalue('<a>Jack</a>' collate latin1_bin,'/a[contains(../a,"j")]') ...@@ -612,6 +612,36 @@ extractvalue('<a>Jack</a>' collate latin1_bin,'/a[contains(../a,"j")]')
select ExtractValue('<tag1><![CDATA[test]]></tag1>','/tag1'); select ExtractValue('<tag1><![CDATA[test]]></tag1>','/tag1');
ExtractValue('<tag1><![CDATA[test]]></tag1>','/tag1') ExtractValue('<tag1><![CDATA[test]]></tag1>','/tag1')
test test
select extractValue('<a>a','/a');
extractValue('<a>a','/a')
NULL
Warnings:
Warning 1504 Incorrect XML value: 'parse error at line 1 pos 5: unexpected END-OF-INPUT'
select extractValue('<a>a<','/a');
extractValue('<a>a<','/a')
NULL
Warnings:
Warning 1504 Incorrect XML value: 'parse error at line 1 pos 6: END-OF-INPUT unexpected (ident or '/' wanted)'
select extractValue('<a>a</','/a');
extractValue('<a>a</','/a')
NULL
Warnings:
Warning 1504 Incorrect XML value: 'parse error at line 1 pos 7: END-OF-INPUT unexpected (ident wanted)'
select extractValue('<a>a</a','/a');
extractValue('<a>a</a','/a')
NULL
Warnings:
Warning 1504 Incorrect XML value: 'parse error at line 1 pos 8: END-OF-INPUT unexpected ('>' wanted)'
select extractValue('<a>a</a></b>','/a');
extractValue('<a>a</a></b>','/a')
NULL
Warnings:
Warning 1504 Incorrect XML value: 'parse error at line 1 pos 12: '</b>' unexpected (END-OF-INPUT wanted)'
select extractValue('<a b=>a</a>','/a');
extractValue('<a b=>a</a>','/a')
NULL
Warnings:
Warning 1504 Incorrect XML value: 'parse error at line 1 pos 7: '>' unexpected (ident or string wanted)'
select extractValue('<e>1</e>','position()'); select extractValue('<e>1</e>','position()');
ERROR HY000: XPATH syntax error: '' ERROR HY000: XPATH syntax error: ''
select extractValue('<e>1</e>','last()'); select extractValue('<e>1</e>','last()');
......
...@@ -53,7 +53,7 @@ SELECT extractValue(@xml,'/a//@x'); ...@@ -53,7 +53,7 @@ SELECT extractValue(@xml,'/a//@x');
SELECT extractValue(@xml,'/a//@x[1]'); SELECT extractValue(@xml,'/a//@x[1]');
SELECT extractValue(@xml,'/a//@x[2]'); SELECT extractValue(@xml,'/a//@x[2]');
SET @xml='<a><b>b1</b><b>b2</b><c><b>c1b1</b><b>c1b2</b></c><c><b>c2b1</c></b>/a>'; SET @xml='<a><b>b1</b><b>b2</b><c><b>c1b1</b><b>c1b2</b></c><c><b>c2b1</c></b></a>';
SELECT extractValue(@xml,'//b[1]'); SELECT extractValue(@xml,'//b[1]');
SELECT extractValue(@xml,'/descendant::b[1]'); SELECT extractValue(@xml,'/descendant::b[1]');
...@@ -284,6 +284,17 @@ select extractvalue('<a>Jack</a>' collate latin1_bin,'/a[contains(../a,"j")]'); ...@@ -284,6 +284,17 @@ select extractvalue('<a>Jack</a>' collate latin1_bin,'/a[contains(../a,"j")]');
# #
select ExtractValue('<tag1><![CDATA[test]]></tag1>','/tag1'); select ExtractValue('<tag1><![CDATA[test]]></tag1>','/tag1');
#
# Bug#18201: XML: ExtractValue works even if the xml fragment
# is not well-formed xml
#
select extractValue('<a>a','/a');
select extractValue('<a>a<','/a');
select extractValue('<a>a</','/a');
select extractValue('<a>a</a','/a');
select extractValue('<a>a</a></b>','/a');
select extractValue('<a b=>a</a>','/a');
# #
# Bug #18171 XML: ExtractValue: the XPath position() # Bug #18171 XML: ExtractValue: the XPath position()
# function crashes the server! # function crashes the server!
......
...@@ -2563,7 +2563,17 @@ String *Item_xml_str_func::parse_xml(String *raw_xml, String *parsed_xml_buf) ...@@ -2563,7 +2563,17 @@ String *Item_xml_str_func::parse_xml(String *raw_xml, String *parsed_xml_buf)
xml_enter(&p, raw_xml->ptr(), 0); xml_enter(&p, raw_xml->ptr(), 0);
/* Execute XML parser */ /* Execute XML parser */
rc= my_xml_parse(&p, raw_xml->ptr(), raw_xml->length()); if ((rc= my_xml_parse(&p, raw_xml->ptr(), raw_xml->length())) != MY_XML_OK)
{
char buf[128];
my_snprintf(buf, sizeof(buf)-1, "parse error at line %d pos %d: %s",
my_xml_error_lineno(&p) + 1,
my_xml_error_pos(&p) + 1,
my_xml_error_string(&p));
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
ER_WRONG_VALUE,
ER(ER_WRONG_VALUE), "XML", buf);
}
my_xml_parser_free(&p); my_xml_parser_free(&p);
return rc == MY_XML_OK ? parsed_xml_buf : 0; return rc == MY_XML_OK ? parsed_xml_buf : 0;
......
...@@ -43,7 +43,7 @@ static const char *lex2str(int lex) ...@@ -43,7 +43,7 @@ static const char *lex2str(int lex)
{ {
switch(lex) switch(lex)
{ {
case MY_XML_EOF: return "EOF"; case MY_XML_EOF: return "END-OF-INPUT";
case MY_XML_STRING: return "STRING"; case MY_XML_STRING: return "STRING";
case MY_XML_IDENT: return "IDENT"; case MY_XML_IDENT: return "IDENT";
case MY_XML_CDATA: return "CDATA"; case MY_XML_CDATA: return "CDATA";
...@@ -195,8 +195,13 @@ static int my_xml_leave(MY_XML_PARSER *p, const char *str, uint slen) ...@@ -195,8 +195,13 @@ static int my_xml_leave(MY_XML_PARSER *p, const char *str, uint slen)
if (str && (slen != glen)) if (str && (slen != glen))
{ {
mstr(s,str,sizeof(s)-1,slen); mstr(s,str,sizeof(s)-1,slen);
mstr(g,e+1,sizeof(g)-1,glen), if (glen)
sprintf(p->errstr,"'</%s>' unexpected ('</%s>' wanted)",s,g); {
mstr(g,e+1,sizeof(g)-1,glen),
sprintf(p->errstr,"'</%s>' unexpected ('</%s>' wanted)",s,g);
}
else
sprintf(p->errstr,"'</%s>' unexpected (END-OF-INPUT wanted)", s);
return MY_XML_ERROR; return MY_XML_ERROR;
} }
...@@ -247,7 +252,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len) ...@@ -247,7 +252,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len)
{ {
if (MY_XML_IDENT != (lex=my_xml_scan(p,&a))) if (MY_XML_IDENT != (lex=my_xml_scan(p,&a)))
{ {
sprintf(p->errstr,"1: %s unexpected (ident wanted)",lex2str(lex)); sprintf(p->errstr,"%s unexpected (ident wanted)",lex2str(lex));
return MY_XML_ERROR; return MY_XML_ERROR;
} }
if (MY_XML_OK != my_xml_leave(p,a.beg,(uint) (a.end-a.beg))) if (MY_XML_OK != my_xml_leave(p,a.beg,(uint) (a.end-a.beg)))
...@@ -275,7 +280,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len) ...@@ -275,7 +280,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len)
} }
else else
{ {
sprintf(p->errstr,"3: %s unexpected (ident or '/' wanted)", sprintf(p->errstr,"%s unexpected (ident or '/' wanted)",
lex2str(lex)); lex2str(lex));
return MY_XML_ERROR; return MY_XML_ERROR;
} }
...@@ -297,7 +302,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len) ...@@ -297,7 +302,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len)
} }
else else
{ {
sprintf(p->errstr,"4: %s unexpected (ident or string wanted)", sprintf(p->errstr,"%s unexpected (ident or string wanted)",
lex2str(lex)); lex2str(lex));
return MY_XML_ERROR; return MY_XML_ERROR;
} }
...@@ -325,7 +330,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len) ...@@ -325,7 +330,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len)
{ {
if (lex != MY_XML_QUESTION) if (lex != MY_XML_QUESTION)
{ {
sprintf(p->errstr,"6: %s unexpected ('?' wanted)",lex2str(lex)); sprintf(p->errstr,"%s unexpected ('?' wanted)",lex2str(lex));
return MY_XML_ERROR; return MY_XML_ERROR;
} }
if (MY_XML_OK != my_xml_leave(p,NULL,0)) if (MY_XML_OK != my_xml_leave(p,NULL,0))
...@@ -341,7 +346,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len) ...@@ -341,7 +346,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len)
if (lex != MY_XML_GT) if (lex != MY_XML_GT)
{ {
sprintf(p->errstr,"5: %s unexpected ('>' wanted)",lex2str(lex)); sprintf(p->errstr,"%s unexpected ('>' wanted)",lex2str(lex));
return MY_XML_ERROR; return MY_XML_ERROR;
} }
} }
...@@ -359,6 +364,12 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len) ...@@ -359,6 +364,12 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len)
} }
} }
} }
if (p->attr[0])
{
sprintf(p->errstr,"unexpected END-OF-INPUT");
return MY_XML_ERROR;
}
return MY_XML_OK; return MY_XML_OK;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment