Commit 3010775e authored by unknown's avatar unknown

Bug#18201: XML: ExtractValue works even if the xml

fragment is not well-formed xml

Problem:
- ExtractValue silently returned NULL if a wrong XML value is passed.
- In some cases "unexpected END-OF-INPUT" error was not detected, and
  a non-NULL result could be returned for a bad XML value.

Fix:
- Adding warning messages, to make user aware why NULL was returned.
- Missing "unexpected END-OF-INPUT" error is reported now.


mysql-test/r/xml.result:
  - Fixing XML systax error in old test
  - Adding test cases.
mysql-test/t/xml.test:
  - Fixing XML systax error in old test
  - Adding test cases.
sql/item_xmlfunc.cc:
  Produce warning in case of XML systax error,
  instead of silentrly returning NULL.
strings/xml.c:
  - Making error messages better looking and clearer:
  It is important because now they're seen in SHOW WARNINGS
  (previously they were used only for debugging purposes).
  - Adding "unexpected END-OF-INPUT" error if after scanning
    closing tag for the root element some input is left
    (previously this error was ignored in a mistake).
parent 3ef01486
......@@ -132,7 +132,7 @@ xb1 xc1
SELECT extractValue(@xml,'/a//@x[2]');
extractValue(@xml,'/a//@x[2]')
xb2 xc2
SET @xml='<a><b>b1</b><b>b2</b><c><b>c1b1</b><b>c1b2</b></c><c><b>c2b1</c></b>/a>';
SET @xml='<a><b>b1</b><b>b2</b><c><b>c1b1</b><b>c1b2</b></c><c><b>c2b1</c></b></a>';
SELECT extractValue(@xml,'//b[1]');
extractValue(@xml,'//b[1]')
b1 c1b1 c2b1
......@@ -612,6 +612,36 @@ extractvalue('<a>Jack</a>' collate latin1_bin,'/a[contains(../a,"j")]')
select ExtractValue('<tag1><![CDATA[test]]></tag1>','/tag1');
ExtractValue('<tag1><![CDATA[test]]></tag1>','/tag1')
test
select extractValue('<a>a','/a');
extractValue('<a>a','/a')
NULL
Warnings:
Warning 1504 Incorrect XML value: 'parse error at line 1 pos 5: unexpected END-OF-INPUT'
select extractValue('<a>a<','/a');
extractValue('<a>a<','/a')
NULL
Warnings:
Warning 1504 Incorrect XML value: 'parse error at line 1 pos 6: END-OF-INPUT unexpected (ident or '/' wanted)'
select extractValue('<a>a</','/a');
extractValue('<a>a</','/a')
NULL
Warnings:
Warning 1504 Incorrect XML value: 'parse error at line 1 pos 7: END-OF-INPUT unexpected (ident wanted)'
select extractValue('<a>a</a','/a');
extractValue('<a>a</a','/a')
NULL
Warnings:
Warning 1504 Incorrect XML value: 'parse error at line 1 pos 8: END-OF-INPUT unexpected ('>' wanted)'
select extractValue('<a>a</a></b>','/a');
extractValue('<a>a</a></b>','/a')
NULL
Warnings:
Warning 1504 Incorrect XML value: 'parse error at line 1 pos 12: '</b>' unexpected (END-OF-INPUT wanted)'
select extractValue('<a b=>a</a>','/a');
extractValue('<a b=>a</a>','/a')
NULL
Warnings:
Warning 1504 Incorrect XML value: 'parse error at line 1 pos 7: '>' unexpected (ident or string wanted)'
select extractValue('<e>1</e>','position()');
ERROR HY000: XPATH syntax error: ''
select extractValue('<e>1</e>','last()');
......
......@@ -53,7 +53,7 @@ SELECT extractValue(@xml,'/a//@x');
SELECT extractValue(@xml,'/a//@x[1]');
SELECT extractValue(@xml,'/a//@x[2]');
SET @xml='<a><b>b1</b><b>b2</b><c><b>c1b1</b><b>c1b2</b></c><c><b>c2b1</c></b>/a>';
SET @xml='<a><b>b1</b><b>b2</b><c><b>c1b1</b><b>c1b2</b></c><c><b>c2b1</c></b></a>';
SELECT extractValue(@xml,'//b[1]');
SELECT extractValue(@xml,'/descendant::b[1]');
......@@ -284,6 +284,17 @@ select extractvalue('<a>Jack</a>' collate latin1_bin,'/a[contains(../a,"j")]');
#
select ExtractValue('<tag1><![CDATA[test]]></tag1>','/tag1');
#
# Bug#18201: XML: ExtractValue works even if the xml fragment
# is not well-formed xml
#
select extractValue('<a>a','/a');
select extractValue('<a>a<','/a');
select extractValue('<a>a</','/a');
select extractValue('<a>a</a','/a');
select extractValue('<a>a</a></b>','/a');
select extractValue('<a b=>a</a>','/a');
#
# Bug #18171 XML: ExtractValue: the XPath position()
# function crashes the server!
......
......@@ -2563,7 +2563,17 @@ String *Item_xml_str_func::parse_xml(String *raw_xml, String *parsed_xml_buf)
xml_enter(&p, raw_xml->ptr(), 0);
/* Execute XML parser */
rc= my_xml_parse(&p, raw_xml->ptr(), raw_xml->length());
if ((rc= my_xml_parse(&p, raw_xml->ptr(), raw_xml->length())) != MY_XML_OK)
{
char buf[128];
my_snprintf(buf, sizeof(buf)-1, "parse error at line %d pos %d: %s",
my_xml_error_lineno(&p) + 1,
my_xml_error_pos(&p) + 1,
my_xml_error_string(&p));
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
ER_WRONG_VALUE,
ER(ER_WRONG_VALUE), "XML", buf);
}
my_xml_parser_free(&p);
return rc == MY_XML_OK ? parsed_xml_buf : 0;
......
......@@ -43,7 +43,7 @@ static const char *lex2str(int lex)
{
switch(lex)
{
case MY_XML_EOF: return "EOF";
case MY_XML_EOF: return "END-OF-INPUT";
case MY_XML_STRING: return "STRING";
case MY_XML_IDENT: return "IDENT";
case MY_XML_CDATA: return "CDATA";
......@@ -195,8 +195,13 @@ static int my_xml_leave(MY_XML_PARSER *p, const char *str, uint slen)
if (str && (slen != glen))
{
mstr(s,str,sizeof(s)-1,slen);
mstr(g,e+1,sizeof(g)-1,glen),
sprintf(p->errstr,"'</%s>' unexpected ('</%s>' wanted)",s,g);
if (glen)
{
mstr(g,e+1,sizeof(g)-1,glen),
sprintf(p->errstr,"'</%s>' unexpected ('</%s>' wanted)",s,g);
}
else
sprintf(p->errstr,"'</%s>' unexpected (END-OF-INPUT wanted)", s);
return MY_XML_ERROR;
}
......@@ -247,7 +252,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len)
{
if (MY_XML_IDENT != (lex=my_xml_scan(p,&a)))
{
sprintf(p->errstr,"1: %s unexpected (ident wanted)",lex2str(lex));
sprintf(p->errstr,"%s unexpected (ident wanted)",lex2str(lex));
return MY_XML_ERROR;
}
if (MY_XML_OK != my_xml_leave(p,a.beg,(uint) (a.end-a.beg)))
......@@ -275,7 +280,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len)
}
else
{
sprintf(p->errstr,"3: %s unexpected (ident or '/' wanted)",
sprintf(p->errstr,"%s unexpected (ident or '/' wanted)",
lex2str(lex));
return MY_XML_ERROR;
}
......@@ -297,7 +302,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len)
}
else
{
sprintf(p->errstr,"4: %s unexpected (ident or string wanted)",
sprintf(p->errstr,"%s unexpected (ident or string wanted)",
lex2str(lex));
return MY_XML_ERROR;
}
......@@ -325,7 +330,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len)
{
if (lex != MY_XML_QUESTION)
{
sprintf(p->errstr,"6: %s unexpected ('?' wanted)",lex2str(lex));
sprintf(p->errstr,"%s unexpected ('?' wanted)",lex2str(lex));
return MY_XML_ERROR;
}
if (MY_XML_OK != my_xml_leave(p,NULL,0))
......@@ -341,7 +346,7 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len)
if (lex != MY_XML_GT)
{
sprintf(p->errstr,"5: %s unexpected ('>' wanted)",lex2str(lex));
sprintf(p->errstr,"%s unexpected ('>' wanted)",lex2str(lex));
return MY_XML_ERROR;
}
}
......@@ -359,6 +364,12 @@ int my_xml_parse(MY_XML_PARSER *p,const char *str, uint len)
}
}
}
if (p->attr[0])
{
sprintf(p->errstr,"unexpected END-OF-INPUT");
return MY_XML_ERROR;
}
return MY_XML_OK;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment