The cdata handling code for <script> and <style> could be confused by

buffer boundaries in the middle of the cdata stretch. Fixed this, and added a clear_cdata_mode() callback after a successful </endtag>. (I hope this doesn't break other things. This parser is getting horribly ad-hoc. :-( )

The cdata handling code for <script> and <style> could be confused by
buffer boundaries in the middle of the cdata stretch. Fixed this, and added a clear_cdata_mode() callback after a successful </endtag>. (I hope this doesn't break other things. This parser is getting horribly ad-hoc. :-( )
9a8fe7c4 · Guido van Rossum · 12d669c1 · 9a8fe7c4
Commit 9a8fe7c4 authored Apr 09, 2001 by Guido van Rossum
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 2 deletions

lib/python/TAL/HTMLParser.py lib/python/TAL/HTMLParser.py +6 -2

No files found.
--- a/lib/python/TAL/HTMLParser.py
+++ b/lib/python/TAL/HTMLParser.py
@@ -14,7 +14,7 @@ import string
 # Regular expressions used for parsing

 interesting_normal = re.compile('[&<]')
-interesting_cdata = re.compile('</')
+interesting_cdata = re.compile(r'<(/|\Z)')
 incomplete = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*|#[0-9]*)?')

 entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
@@ -144,6 +144,9 @@ class HTMLParser:
    def set_cdata_mode(self):
        self.interesting = interesting_cdata

+    def clear_cdata_mode(self):
+        self.interesting = interesting_normal
+
    # Internal -- handle data as far as reasonable.  May leave state
    # and data to be processed by a subsequent call.  If 'end' is
    # true, force handling all data as if followed by EOF marker.
@@ -155,7 +158,6 @@ class HTMLParser:
            match = self.interesting.search(rawdata, i) # < or &
            if match:
                j = match.start()
-                self.interesting = interesting_normal
            else:
                j = n
            if i < j: self.handle_data(rawdata[i:j])
@@ -166,6 +168,8 @@ class HTMLParser:
                    k = self.parse_starttag(i)
                elif endtagopen.match(rawdata, i): # </
                    k = self.parse_endtag(i)
+                    if k >= 0:
+                        self.clear_cdata_mode()
                elif commentopen.match(rawdata, i): # <!--
                    k = self.parse_comment(i)
                elif piopen.match(rawdata, i): # <?