Commit 4dc97b90 authored by Evan Simpson's avatar Evan Simpson

Fix Collector #721 by preserving syntactically valid character entities in attributes.

parent 2857f5d3
...@@ -164,3 +164,24 @@ def getProgramVersion(program): ...@@ -164,3 +164,24 @@ def getProgramVersion(program):
if opcode == "version": if opcode == "version":
return version return version
return None return None
import re
_ent1_re = re.compile('&(?![A-Z#])', re.I)
_entch_re = re.compile('&([A-Z][A-Z0-9]*)(?![A-Z0-9;])', re.I)
_entn1_re = re.compile('&#(?![0-9X])', re.I)
_entnx_re = re.compile('&(#X[A-F0-9]*)(?![A-F0-9;])', re.I)
_entnd_re = re.compile('&(#[0-9][0-9]*)(?![0-9;])')
del re
def attrEscape(s):
"""Replace special characters '&<>' by character entities,
except when '&' already begins a syntactically valid entity."""
s = _ent1_re.sub('&amp;', s)
s = _entch_re.sub(r'&amp;\1', s)
s = _entn1_re.sub('&amp;#', s)
s = _entnx_re.sub(r'&amp;\1', s)
s = _entnd_re.sub(r'&amp;\1', s)
s = s.replace('<', '&lt;')
s = s.replace('>', '&gt;')
s = s.replace('"', '&quot;')
return s
...@@ -162,7 +162,7 @@ class TALGenerator: ...@@ -162,7 +162,7 @@ class TALGenerator:
if item[1] is None: if item[1] is None:
s = item[0] s = item[0]
else: else:
s = '%s="%s"' % (item[0], cgi.escape(item[1], 1)) s = '%s="%s"' % (item[0], TALDefs.attrEscape(item[1]))
attrlist[i] = item[0], s attrlist[i] = item[0], s
new.append(" " + s) new.append(" " + s)
# if no non-optimizable attributes were found, convert to plain text # if no non-optimizable attributes were found, convert to plain text
......
...@@ -62,31 +62,32 @@ class OutputPresentationTestCase(TestCaseBase): ...@@ -62,31 +62,32 @@ class OutputPresentationTestCase(TestCaseBase):
attributes=", so" the="output" needs="to" attributes=", so" the="output" needs="to"
be="line" wrapped="."> be="line" wrapped=".">
</html>''' "\n" </html>''' "\n"
program, macros = self._compile(INPUT) self.compare(INPUT, EXPECTED)
sio = StringIO()
interp = TALInterpreter(program, {}, DummyEngine(), sio, wrap=60)
interp()
self.assertEqual(sio.getvalue(), EXPECTED)
def check_unicode_content(self): def check_unicode_content(self):
INPUT = """<p tal:content="python:u'dj-vu'">para</p>""" INPUT = """<p tal:content="python:u'dj-vu'">para</p>"""
EXPECTED = u"""<p>dj-vu</p>""" "\n" EXPECTED = u"""<p>dj-vu</p>""" "\n"
program, macros = self._compile(INPUT) self.compare(INPUT, EXPECTED)
sio = StringIO()
interp = TALInterpreter(program, {}, DummyEngine(), sio, wrap=60)
interp()
self.assertEqual(sio.getvalue(), EXPECTED)
def check_unicode_structure(self): def check_unicode_structure(self):
INPUT = """<p tal:replace="structure python:u'dj-vu'">para</p>""" INPUT = """<p tal:replace="structure python:u'dj-vu'">para</p>"""
EXPECTED = u"""dj-vu""" "\n" EXPECTED = u"""dj-vu""" "\n"
self.compare(INPUT, EXPECTED)
def check_entities(self):
INPUT = ('<img tal:define="foo nothing" '
'alt="&a; &#1; &#x0a; &a &#45 &; &#0a; <>" />')
EXPECTED = ('<img alt="&a; &#1; &#x0a; '
'&amp;a &amp;#45 &amp;; &amp;#0a; &lt;&gt;" />\n')
self.compare(INPUT, EXPECTED)
def compare(self, INPUT, EXPECTED):
program, macros = self._compile(INPUT) program, macros = self._compile(INPUT)
sio = StringIO() sio = StringIO()
interp = TALInterpreter(program, {}, DummyEngine(), sio, wrap=60) interp = TALInterpreter(program, {}, DummyEngine(), sio, wrap=60)
interp() interp()
self.assertEqual(sio.getvalue(), EXPECTED) self.assertEqual(sio.getvalue(), EXPECTED)
def test_suite(): def test_suite():
suite = unittest.TestSuite() suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(MacroErrorsTestCase, "check_")) suite.addTest(unittest.makeSuite(MacroErrorsTestCase, "check_"))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment