Commit 1fa56ad1 authored by Fred Drake's avatar Fred Drake

Test cases for TAL.HTMLParser module.

parent 8680ca94
"""Test suite for nsgmllib.py."""
import sys
import utils
import unittest
from TAL import HTMLParser
class EventCollector(HTMLParser.HTMLParser):
def __init__(self):
self.events = []
self.append = self.events.append
HTMLParser.HTMLParser.__init__(self)
def get_events(self):
# Normalize the list of events so that buffer artefacts don't
# separate runs of contiguous characters.
L = []
prevtype = None
for event in self.events:
type = event[0]
if type == prevtype == "data":
L[-1] = ("data", L[-1][1] + event[1])
else:
L.append(event)
prevtype = type
self.events = L
return L
# structure markup
def finish_starttag(self, tag, attrs):
self.append(("starttag", tag, attrs))
def finish_endtag(self, tag):
self.append(("endtag", tag))
# all other markup
def handle_charref(self, data):
self.append(("charref", data))
def handle_data(self, data):
self.append(("data", data))
def handle_decl(self, data):
self.append(("decl", data))
def handle_entityref(self, data):
self.append(("entityref", data))
def handle_pi(self, data):
self.append(("pi", data))
class HTMLParserTestCase(unittest.TestCase):
def _run_check(self, source, events):
parser = EventCollector()
parser.feed(source)
parser.close()
assert parser.get_events() == events, parser.get_events()
def check_processing_instruction_only(self):
self._run_check("<?processing instruction>", [
("pi", "processing instruction"),
])
def check_simple_html(self):
self._run_check("""
<!DOCTYPE html PUBLIC 'foo'>
<html>&entity;&#32;
<img src='bar' ismap>sample
text
</html>
""", [
("data", "\n"),
("decl", "DOCTYPE html PUBLIC 'foo'"),
("data", "\n"),
("starttag", "html", []),
("entityref", "entity"),
("charref", "32"),
("data", "\n"),
("starttag", "img", [("src", "bar"), ("ismap", "ismap")]),
("data", "sample\ntext\n"),
("endtag", "html"),
("data", "\n"),
])
def check_bad_nesting(self):
self._run_check("<a><b></a></b>", [
("starttag", "a", []),
("starttag", "b", []),
("endtag", "a"),
("endtag", "b"),
])
def check_attr_syntax(self):
output = [
("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", "e")])
]
self._run_check("""<a b='v' c="v" d=v e>""", output)
self._run_check("""<a b = 'v' c = "v" d = v e>""", output)
self._run_check("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output)
self._run_check("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output)
def check_attr_values(self):
self._run_check("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""",
[("starttag", "a", [("b", "xxx\n\txxx"),
("c", "yyy\t\nyyy"),
("d", "\txyz\n")])
])
self._run_check("""<a b='' c="" d=>""", [
("starttag", "a", [("b", ""), ("c", ""), ("d", "")]),
])
def check_attr_entity_replacement(self):
# we expect entities *not* to be replaced by HTLMParser!
self._run_check("""<a b='&amp;&gt;&lt;&quot;&apos;'>""", [
("starttag", "a", [("b", "&><\"'")]),
])
def check_attr_funky_names(self):
self._run_check("""<a a.b='v' c:d=v e-f=v>""", [
("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),
])
# Support for the Zope regression test framework:
def test_suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(HTMLParserTestCase, "check_"))
return suite
if __name__ == "__main__":
errs = utils.run_suite(test_suite())
sys.exit(errs and 1 or 0)
"""Test suite for nsgmllib.py."""
import sys
import utils
import unittest
from TAL import HTMLParser
class EventCollector(HTMLParser.HTMLParser):
def __init__(self):
self.events = []
self.append = self.events.append
HTMLParser.HTMLParser.__init__(self)
def get_events(self):
# Normalize the list of events so that buffer artefacts don't
# separate runs of contiguous characters.
L = []
prevtype = None
for event in self.events:
type = event[0]
if type == prevtype == "data":
L[-1] = ("data", L[-1][1] + event[1])
else:
L.append(event)
prevtype = type
self.events = L
return L
# structure markup
def finish_starttag(self, tag, attrs):
self.append(("starttag", tag, attrs))
def finish_endtag(self, tag):
self.append(("endtag", tag))
# all other markup
def handle_charref(self, data):
self.append(("charref", data))
def handle_data(self, data):
self.append(("data", data))
def handle_decl(self, data):
self.append(("decl", data))
def handle_entityref(self, data):
self.append(("entityref", data))
def handle_pi(self, data):
self.append(("pi", data))
class HTMLParserTestCase(unittest.TestCase):
def _run_check(self, source, events):
parser = EventCollector()
parser.feed(source)
parser.close()
assert parser.get_events() == events, parser.get_events()
def check_processing_instruction_only(self):
self._run_check("<?processing instruction>", [
("pi", "processing instruction"),
])
def check_simple_html(self):
self._run_check("""
<!DOCTYPE html PUBLIC 'foo'>
<html>&entity;&#32;
<img src='bar' ismap>sample
text
</html>
""", [
("data", "\n"),
("decl", "DOCTYPE html PUBLIC 'foo'"),
("data", "\n"),
("starttag", "html", []),
("entityref", "entity"),
("charref", "32"),
("data", "\n"),
("starttag", "img", [("src", "bar"), ("ismap", "ismap")]),
("data", "sample\ntext\n"),
("endtag", "html"),
("data", "\n"),
])
def check_bad_nesting(self):
self._run_check("<a><b></a></b>", [
("starttag", "a", []),
("starttag", "b", []),
("endtag", "a"),
("endtag", "b"),
])
def check_attr_syntax(self):
output = [
("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", "e")])
]
self._run_check("""<a b='v' c="v" d=v e>""", output)
self._run_check("""<a b = 'v' c = "v" d = v e>""", output)
self._run_check("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output)
self._run_check("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output)
def check_attr_values(self):
self._run_check("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""",
[("starttag", "a", [("b", "xxx\n\txxx"),
("c", "yyy\t\nyyy"),
("d", "\txyz\n")])
])
self._run_check("""<a b='' c="" d=>""", [
("starttag", "a", [("b", ""), ("c", ""), ("d", "")]),
])
def check_attr_entity_replacement(self):
# we expect entities *not* to be replaced by HTLMParser!
self._run_check("""<a b='&amp;&gt;&lt;&quot;&apos;'>""", [
("starttag", "a", [("b", "&><\"'")]),
])
def check_attr_funky_names(self):
self._run_check("""<a a.b='v' c:d=v e-f=v>""", [
("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]),
])
# Support for the Zope regression test framework:
def test_suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(HTMLParserTestCase, "check_"))
return suite
if __name__ == "__main__":
errs = utils.run_suite(test_suite())
sys.exit(errs and 1 or 0)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment