Commit 1ca0eb84 authored by Fred Drake's avatar Fred Drake

Fix two buffer boundary issues; this restores this to passing the test
suite without any large restructuring.

Guido & I will be looking at how this is structured later; buffer
boundary checks will make this nearly unmaintainable if we can't
bring about a better structure to the code.  (Better tests would also
be nice!)
parent 148fb522
...@@ -243,6 +243,10 @@ class HTMLParser: ...@@ -243,6 +243,10 @@ class HTMLParser:
rawdata = self.rawdata rawdata = self.rawdata
j = i + 2 j = i + 2
assert rawdata[i:j] == "<!", "unexpected call to parse_declaration" assert rawdata[i:j] == "<!", "unexpected call to parse_declaration"
if rawdata[j:j+1] in ("-", ""):
# Start of comment followed by buffer boundary,
# or just a buffer boundary.
return -1
# in practice, this should look like: ((name|stringlit) S*)+ '>' # in practice, this should look like: ((name|stringlit) S*)+ '>'
n = len(rawdata) n = len(rawdata)
while j < n: while j < n:
...@@ -340,14 +344,24 @@ class HTMLParser: ...@@ -340,14 +344,24 @@ class HTMLParser:
next = rawdata[j:j+1] next = rawdata[j:j+1]
if next == ">": if next == ">":
return j + 1 return j + 1
if rawdata[j:j+2] == "/>": if next == "/":
return j + 2 s = rawdata[j:j+2]
if s == "/>":
return j + 2
if s == "/":
# buffer boundary
return -1
# else bogus input
self.updatepos(i, j + 1)
raise HTMLParseError("malformed empty start tag",
self.getpos())
if next == "": if next == "":
# end of input # end of input
return -1 return -1
if next in ("abcdefghijklmnopqrstuvwxyz=" if next in ("abcdefghijklmnopqrstuvwxyz=/"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"): "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
# end of input in or before attribute value # end of input in or before attribute value, or we have the
# '/' from a '/>' ending
return -1 return -1
self.updatepos(i, j) self.updatepos(i, j)
raise HTMLParseError("malformed start tag", self.getpos()) raise HTMLParseError("malformed start tag", self.getpos())
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment