Commit fb73bb90 authored by Guido van Rossum's avatar Guido van Rossum

Change several "raise HTMLParseError" statements into assertions, as

these are really complaints about the intgrity of our own code.
HTMLParseError should be raised only for invalid input.  (Should we
turn more unrecognized constructs into exceptions, e.g. '&' not
followed by an entity or character reference?)

Also added an 'r' prefix to a regex string containing a backslash.
parent 062277d2
...@@ -23,7 +23,7 @@ entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') ...@@ -23,7 +23,7 @@ entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
charref = re.compile('&#([0-9]+)[^0-9]') charref = re.compile('&#([0-9]+)[^0-9]')
starttagopen = re.compile('<[a-zA-Z]') starttagopen = re.compile('<[a-zA-Z]')
piopen = re.compile('<\?') piopen = re.compile(r'<\?')
piclose = re.compile('>') piclose = re.compile('>')
endtagopen = re.compile('</[a-zA-Z]') endtagopen = re.compile('</[a-zA-Z]')
special = re.compile('<![^<>]*>') special = re.compile('<![^<>]*>')
...@@ -147,6 +147,7 @@ class HTMLParser: ...@@ -147,6 +147,7 @@ class HTMLParser:
if i < j: self.handle_data(rawdata[i:j]) if i < j: self.handle_data(rawdata[i:j])
i = self.updatepos(i, j) i = self.updatepos(i, j)
if i == n: break if i == n: break
assert rawdata[i] in "<&", "interesting.search() lied"
if rawdata[i] == '<': if rawdata[i] == '<':
if starttagopen.match(rawdata, i): if starttagopen.match(rawdata, i):
if self.literal: if self.literal:
...@@ -213,8 +214,6 @@ class HTMLParser: ...@@ -213,8 +214,6 @@ class HTMLParser:
k = k-1 k = k-1
i = self.updatepos(i, k) i = self.updatepos(i, k)
continue continue
else:
raise HTMLParserError('neither < nor & ??', self.getpos())
# We get here only if incomplete matches but # We get here only if incomplete matches but
# nothing else # nothing else
match = incomplete.match(rawdata, i) match = incomplete.match(rawdata, i)
...@@ -237,9 +236,7 @@ class HTMLParser: ...@@ -237,9 +236,7 @@ class HTMLParser:
# Internal -- parse comment, return length or -1 if not terminated # Internal -- parse comment, return length or -1 if not terminated
def parse_comment(self, i): def parse_comment(self, i):
rawdata = self.rawdata rawdata = self.rawdata
if rawdata[i:i+4] != '<!--': assert rawdata[i:i+4] == '<!--', 'unexpected call to parse_comment()'
raise HTMLParseError('unexpected call to parse_comment()',
self.getpos())
match = commentclose.search(rawdata, i+4) match = commentclose.search(rawdata, i+4)
if not match: if not match:
return -1 return -1
...@@ -283,9 +280,7 @@ class HTMLParser: ...@@ -283,9 +280,7 @@ class HTMLParser:
# Internal -- parse processing instr, return length or -1 if not terminated # Internal -- parse processing instr, return length or -1 if not terminated
def parse_pi(self, i): def parse_pi(self, i):
rawdata = self.rawdata rawdata = self.rawdata
if rawdata[i:i+2] != '<?': assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
raise HTMLParseError('unexpected call to parse_pi()',
self.getpos())
match = piclose.search(rawdata, i+2) match = piclose.search(rawdata, i+2)
if not match: if not match:
return -1 return -1
...@@ -311,9 +306,7 @@ class HTMLParser: ...@@ -311,9 +306,7 @@ class HTMLParser:
# Now parse the data between i+1 and j into a tag and attrs # Now parse the data between i+1 and j into a tag and attrs
attrs = [] attrs = []
match = tagfind.match(rawdata, i+1) match = tagfind.match(rawdata, i+1)
if not match: assert match, 'unexpected call to parse_starttag()'
raise HTMLParseError('unexpected call to parse_starttag()',
self.getpos())
k = match.end(0) k = match.end(0)
self.lasttag = tag = string.lower(rawdata[i+1:k]) self.lasttag = tag = string.lower(rawdata[i+1:k])
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment