Commit 4dc97b90 authored by Evan Simpson's avatar Evan Simpson

Fix Collector #721 by preserving syntactically valid character entities in attributes.

parent 2857f5d3
......@@ -164,3 +164,24 @@ def getProgramVersion(program):
if opcode == "version":
return version
return None
import re
_ent1_re = re.compile('&(?![A-Z#])', re.I)
_entch_re = re.compile('&([A-Z][A-Z0-9]*)(?![A-Z0-9;])', re.I)
_entn1_re = re.compile('&#(?![0-9X])', re.I)
_entnx_re = re.compile('&(#X[A-F0-9]*)(?![A-F0-9;])', re.I)
_entnd_re = re.compile('&(#[0-9][0-9]*)(?![0-9;])')
del re
def attrEscape(s):
"""Replace special characters '&<>' by character entities,
except when '&' already begins a syntactically valid entity."""
s = _ent1_re.sub('&amp;', s)
s = _entch_re.sub(r'&amp;\1', s)
s = _entn1_re.sub('&amp;#', s)
s = _entnx_re.sub(r'&amp;\1', s)
s = _entnd_re.sub(r'&amp;\1', s)
s = s.replace('<', '&lt;')
s = s.replace('>', '&gt;')
s = s.replace('"', '&quot;')
return s
......@@ -162,7 +162,7 @@ class TALGenerator:
if item[1] is None:
s = item[0]
else:
s = '%s="%s"' % (item[0], cgi.escape(item[1], 1))
s = '%s="%s"' % (item[0], TALDefs.attrEscape(item[1]))
attrlist[i] = item[0], s
new.append(" " + s)
# if no non-optimizable attributes were found, convert to plain text
......
......@@ -62,31 +62,32 @@ class OutputPresentationTestCase(TestCaseBase):
attributes=", so" the="output" needs="to"
be="line" wrapped=".">
</html>''' "\n"
program, macros = self._compile(INPUT)
sio = StringIO()
interp = TALInterpreter(program, {}, DummyEngine(), sio, wrap=60)
interp()
self.assertEqual(sio.getvalue(), EXPECTED)
self.compare(INPUT, EXPECTED)
def check_unicode_content(self):
INPUT = """<p tal:content="python:u'dj-vu'">para</p>"""
EXPECTED = u"""<p>dj-vu</p>""" "\n"
program, macros = self._compile(INPUT)
sio = StringIO()
interp = TALInterpreter(program, {}, DummyEngine(), sio, wrap=60)
interp()
self.assertEqual(sio.getvalue(), EXPECTED)
self.compare(INPUT, EXPECTED)
def check_unicode_structure(self):
INPUT = """<p tal:replace="structure python:u'dj-vu'">para</p>"""
EXPECTED = u"""dj-vu""" "\n"
self.compare(INPUT, EXPECTED)
def check_entities(self):
INPUT = ('<img tal:define="foo nothing" '
'alt="&a; &#1; &#x0a; &a &#45 &; &#0a; <>" />')
EXPECTED = ('<img alt="&a; &#1; &#x0a; '
'&amp;a &amp;#45 &amp;; &amp;#0a; &lt;&gt;" />\n')
self.compare(INPUT, EXPECTED)
def compare(self, INPUT, EXPECTED):
program, macros = self._compile(INPUT)
sio = StringIO()
interp = TALInterpreter(program, {}, DummyEngine(), sio, wrap=60)
interp()
self.assertEqual(sio.getvalue(), EXPECTED)
def test_suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(MacroErrorsTestCase, "check_"))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment