Commit c161fc80 authored by Andreas Jung's avatar Andreas Jung

Merging

/Zope/branches/ajung-zpt-encoding-fixes

This branch fixes several encoding issues with the ZopePageTemplate
implementation, some webdav issues and now uses unicode internally
for ZPT instances (but not for the PageTemplate(File) classes)
                         
parents 687ff981 336dfdca
......@@ -39,6 +39,8 @@ Zope Changes
until the late startup phase. This in in particular useful when running
Zope behind a loadbalancer (patch by Patrick Gerken).
- the ZopePageTemplate implementation now uses unicode internally.
Bugs Fixed
- Collector #2191: extended DateTime parser for better support
......
......@@ -32,6 +32,14 @@ from zope.pagetemplate.pagetemplatefile import sniff_type
LOG = getLogger('PageTemplateFile')
def guess_type(filename, text):
# check for XML ourself since guess_content_type can't
# detect text/xml if 'filename' won't end with .xml
# XXX: fix this in zope.contenttype
if text.startswith('<?xml'):
return 'text/xml'
content_type, dummy = guess_content_type(filename, text)
if content_type in ('text/html', 'text/xml'):
return content_type
......
......@@ -40,20 +40,14 @@ from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Products.PageTemplates.PageTemplateFile import guess_type
from Products.PageTemplates.Expressions import SecureModuleImporter
# regular expression to extract the encoding from the XML preamble
encoding_reg = re.compile('<\?xml.*?encoding="(.*?)".*?\?>', re.M)
from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv
preferred_encodings = ['utf-8', 'iso-8859-15']
if os.environ.has_key('ZPT_PREFERRED_ENCODING'):
preferred_encodings.insert(0, os.environ['ZPT_PREFERRED_ENCODING'])
def sniffEncoding(text, default_encoding='utf-8'):
"""Try to determine the encoding from html or xml"""
if text.startswith('<?xml'):
mo = encoding_reg.search(text)
if mo:
return mo.group(1)
return default_encoding
class Src(Acquisition.Explicit):
""" I am scary code """
......@@ -79,7 +73,6 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
func_defaults = None
func_code = FuncCode((), 0)
strict = False
_default_bindings = {'name_subpath': 'traverse_subpath'}
_default_content_fn = os.path.join(package_home(globals()),
......@@ -108,22 +101,68 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
security.declareProtected(view_management_screens,
'read', 'ZScriptHTML_tryForm')
def __init__(self, id, text=None, content_type=None, encoding='utf-8',
strict=False):
def __init__(self, id, text=None, content_type=None, strict=True, output_encoding='utf-8'):
self.id = id
self.expand = 0
self.strict = strict
self.ZBindings_edit(self._default_bindings)
self.output_encoding = output_encoding
# default content
if not text:
text = open(self._default_content_fn).read()
encoding = 'utf-8'
content_type = 'text/html'
self.pt_edit(text, content_type, encoding)
self.pt_edit(text, content_type)
security.declareProtected(change_page_templates, 'pt_edit')
def pt_edit(self, text, content_type, encoding='utf-8'):
def pt_edit(self, text, content_type, keep_output_encoding=False):
text = text.strip()
if self.strict and not isinstance(text, unicode):
is_unicode = isinstance(text, unicode)
encoding = None
output_encoding = None
if content_type == 'text/xml':
if is_unicode:
encoding = None
output_encoding = 'utf-8'
else:
encoding = encodingFromXMLPreamble(text)
output_encoding = 'utf-8'
elif content_type == 'text/html':
charset = charsetFromMetaEquiv(text)
if is_unicode:
if charset:
encoding = None
output_encoding = charset
else:
encoding = None
output_encoding = 'iso-8859-15'
else:
if charset:
encoding = charset
output_encoding = charset
else:
encoding = 'iso-8859-15'
output_encoding = 'iso-8859-15'
else:
raise ValueError('Unsupported content-type %s' % content_type)
# for content updated through WebDAV, FTP
if not keep_output_encoding:
self.output_encoding = output_encoding
if not is_unicode:
text = unicode(text, encoding)
self.ZCacheable_invalidate()
......@@ -137,16 +176,16 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
source_dot_xml = Src()
security.declareProtected(change_page_templates, 'pt_editAction')
def pt_editAction(self, REQUEST, title, text, content_type, encoding, expand):
def pt_editAction(self, REQUEST, title, text, content_type, expand):
"""Change the title and document."""
if self.wl_isLocked():
raise ResourceLockedError("File is locked via WebDAV")
self.expand = expand
self.pt_setTitle(title, encoding)
self.pt_setTitle(title, self.output_encoding)
self.pt_edit(text, content_type, encoding)
self.pt_edit(text, content_type, True)
REQUEST.set('text', self.read()) # May not equal 'text'!
REQUEST.set('title', self.title)
message = "Saved changes."
......@@ -157,7 +196,7 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
security.declareProtected(change_page_templates, 'pt_setTitle')
def pt_setTitle(self, title, encoding='utf-8'):
if self.strict and not isinstance(title, unicode):
if not isinstance(title, unicode):
title = unicode(title, encoding)
self._setPropValue('title', title)
......@@ -186,8 +225,7 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
if not content_type in ('text/html', 'text/xml'):
raise ValueError('Unsupported mimetype: %s' % content_type)
encoding = sniffEncoding(text, encoding)
self.pt_edit(text, content_type, encoding)
self.pt_edit(text, content_type)
return self.pt_editForm(manage_tabs_message='Saved changes')
security.declareProtected(change_page_templates, 'pt_changePrefs')
......@@ -240,6 +278,8 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
return c
def write(self, text):
if not isinstance(text, unicode):
raise TypeError("'text' parameter must be unicode")
self.ZCacheable_invalidate()
ZopePageTemplate.inheritedAttribute('write')(self, text)
......@@ -291,8 +331,9 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
""" Handle HTTP PUT requests """
self.dav__init(REQUEST, RESPONSE)
self.dav__simpleifhandler(REQUEST, RESPONSE, refresh=1)
## XXX this should be unicode or we must pass an encoding
self.pt_edit(REQUEST.get('BODY', ''))
text = REQUEST.get('BODY', '')
content_type = guess_type('', text)
self.pt_edit(text, content_type)
RESPONSE.setStatus(204)
return RESPONSE
......@@ -303,8 +344,8 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
security.declareProtected(ftp_access, 'manage_FTPget')
def manage_FTPget(self):
"Get source for FTP download"
self.REQUEST.RESPONSE.setHeader('Content-Type', self.content_type)
return self.read()
result = self.pt_render()
return result.encode(self.output_encoding)
security.declareProtected(view_management_screens, 'html')
def html(self):
......@@ -353,6 +394,12 @@ class ZopePageTemplate(Script, PageTemplate, Historical, Cacheable,
# acquisition context, so we don't know where it is. :-(
return None
def pt_render(self, source=False, extra_context={}):
result = PageTemplate.pt_render(self, source, extra_context)
assert isinstance(result, unicode)
return result
def wl_isLocked(self):
return 0
......@@ -407,7 +454,7 @@ def manage_addPageTemplate(self, id, title='', text='', encoding='utf-8',
content_type = headers['content_type']
else:
content_type = guess_type(filename, text)
encoding = sniffEncoding(text, encoding)
else:
if hasattr(text, 'read'):
......@@ -418,9 +465,14 @@ def manage_addPageTemplate(self, id, title='', text='', encoding='utf-8',
content_type = headers['content_type']
else:
content_type = guess_type(filename, text)
encoding = sniffEncoding(text, encoding)
zpt = ZopePageTemplate(id, text, content_type, encoding)
# ensure that we pass unicode to the constructor to
# avoid further hassles with pt_edit()
if not isinstance(text, unicode):
text = unicode(text, encoding)
zpt = ZopePageTemplate(id, text, content_type, output_encoding=encoding)
zpt.pt_setTitle(title, encoding)
self._setObject(id, zpt)
zpt = getattr(self, id)
......
# -*- encoding: iso-8859-15 -*-
"""ZopePageTemplate regression tests.
Ensures that adding a page template works correctly.
......@@ -6,13 +8,162 @@ Note: Tests require Zope >= 2.7
"""
import unittest
import Zope2
import transaction
import zope.component.testing
from zope.traversing.adapters import DefaultTraversable
from Testing.makerequest import makerequest
from Testing.ZopeTestCase import ZopeTestCase, installProduct
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate, manage_addPageTemplate
from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv
ascii_str = '<html><body>hello world</body></html>'
iso885915_str = '<html><body></body></html>'
utf8_str = unicode(iso885915_str, 'iso-8859-15').encode('utf-8')
xml_template = '''<?xml vesion="1.0" encoding="%s"?>
<foo>
</foo>
'''
xml_iso_8859_15 = xml_template % 'iso-8859-15'
xml_utf8 = unicode(xml_template, 'iso-8859-15').encode('utf-8') % 'utf-8'
html_template_w_header = '''
<html>
<head>
<META http-equiv="content-type" content="text/html; charset=%s">
</hed>
<body>
test
</body>
</html>
'''
html_iso_8859_15_w_header = html_template_w_header % 'iso-8859-15'
html_utf8_w_header = unicode(html_template_w_header, 'iso-8859-15').encode('utf-8') % 'utf-8'
html_template_wo_header = '''
<html>
<body>
test
</body>
</html>
'''
html_iso_8859_15_wo_header = html_template_wo_header
html_utf8_wo_header = unicode(html_template_wo_header, 'iso-8859-15').encode('utf-8')
installProduct('PageTemplates')
class ZPTUtilsTests(unittest.TestCase):
def testExtractEncodingFromXMLPreamble(self):
extract = encodingFromXMLPreamble
self.assertEqual(extract('<?xml version="1.0" ?>'), 'utf-8')
self.assertEqual(extract('<?xml encoding="utf-8" version="1.0" ?>'), 'utf-8')
self.assertEqual(extract('<?xml encoding="UTF-8" version="1.0" ?>'), 'utf-8')
self.assertEqual(extract('<?xml encoding="ISO-8859-15" version="1.0" ?>'), 'iso-8859-15')
self.assertEqual(extract('<?xml encoding="iso-8859-15" version="1.0" ?>'), 'iso-8859-15')
def testExtractCharsetFromMetaHTTPEquivTag(self):
extract = charsetFromMetaEquiv
self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=UTF-8"></html>'), 'utf-8')
self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=iso-8859-15"></html>'), 'iso-8859-15')
self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html"></html>'), None)
self.assertEqual(extract('<html>...<html>'), None)
class ZopePageTemplateFileTests(ZopeTestCase):
def testPT_RenderWithAscii(self):
manage_addPageTemplate(self.app, 'test', text=ascii_str, encoding='ascii')
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
self.assertEqual(result.encode('ascii').startswith(ascii_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testPT_RenderWithISO885915(self):
manage_addPageTemplate(self.app, 'test', text=iso885915_str, encoding='iso-8859-15')
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
self.assertEqual(result.encode('iso-8859-15').startswith(iso885915_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testPT_RenderWithUTF8(self):
manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
self.assertEqual(result.encode('utf-8').startswith(utf8_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testWriteAcceptsUnicode(self):
manage_addPageTemplate(self.app, 'test', '', encoding='utf-8')
zpt = self.app['test']
s = u'this is unicode'
zpt.write(s)
self.assertEqual(zpt.read(), s)
self.assertEqual(isinstance(zpt.read(), unicode), True)
def testWriteWontAcceptsNonUnicode(self):
manage_addPageTemplate(self.app, 'test', '', encoding='utf-8')
zpt = self.app['test']
self.assertRaises(TypeError, zpt.write, 'this is not unicode')
def _createZPT(self):
manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
zpt = self.app['test']
return zpt
def _makePUTRequest(self, body):
return {'BODY' : body}
def _put(self, text):
zpt = self._createZPT()
REQUEST = self.app.REQUEST
REQUEST.set('BODY', text)
zpt.PUT(REQUEST, REQUEST.RESPONSE)
return zpt
def testPutHTMLIso8859_15WithCharsetInfo(self):
zpt = self._put(html_iso_8859_15_w_header)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
self.assertEqual(zpt.content_type, 'text/html')
def testPutHTMLUTF8_WithCharsetInfo(self):
zpt = self._put(html_utf8_w_header)
self.assertEqual(zpt.output_encoding, 'utf-8')
self.assertEqual(zpt.content_type, 'text/html')
def testPutHTMLIso8859_15WithoutCharsetInfo(self):
zpt = self._put(html_iso_8859_15_wo_header)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
self.assertEqual(zpt.content_type, 'text/html')
def testPutHTMLUTF8_WithoutCharsetInfo(self):
zpt = self._put(html_utf8_wo_header)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
self.assertEqual(zpt.content_type, 'text/html')
def testPutXMLIso8859_15(self):
""" XML: use always UTF-8 als output encoding """
zpt = self._put(xml_iso_8859_15)
self.assertEqual(zpt.output_encoding, 'utf-8')
self.assertEqual(zpt.content_type, 'text/xml')
def testPutXMLUTF8(self):
""" XML: use always UTF-8 als output encoding """
zpt = self._put(xml_utf8)
self.assertEqual(zpt.output_encoding, 'utf-8')
self.assertEqual(zpt.content_type, 'text/xml')
class ZPTRegressions(unittest.TestCase):
......@@ -58,13 +209,6 @@ class ZPTRegressions(unittest.TestCase):
pt = self.app.pt1
self.assertEqual(pt.document_src(), self.text)
def test_BBB_for_strict_attribute(self):
# Collector 2213: old templates don't have 'strict' attribute.
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
zpt = ZopePageTemplate('issue_2213')
del zpt.strict # simulate old templates
self.assertEqual(zpt.strict, False)
class ZPTMacros(zope.component.testing.PlacelessSetup, unittest.TestCase):
......@@ -132,7 +276,9 @@ class DummyFileUpload:
def test_suite():
suite = unittest.makeSuite(ZPTRegressions)
suite.addTests(unittest.makeSuite(ZPTUtilsTests))
suite.addTests(unittest.makeSuite(ZPTMacros))
suite.addTests(unittest.makeSuite(ZopePageTemplateFileTests))
return suite
if __name__ == '__main__':
......
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
""" Some helper methods
$Id: ZopePageTemplate.py 71579 2006-12-17 20:26:10Z andreasjung $
"""
import re
xml_preamble_reg = re.compile(r'^<\?xml.*?encoding="(.*?)".*?\?>', re.M)
http_equiv_reg = re.compile(r'(<meta.*?http\-equiv.*?content-type.*?>)', re.I|re.M|re.S)
http_equiv_reg2 = re.compile(r'charset.*?=.*?(?P<charset>[\w\-]*)', re.I|re.M|re.S)
def encodingFromXMLPreamble(xml):
""" Extract the encoding from a xml preamble.
Return 'utf-8' if not available
"""
mo = xml_preamble_reg.match(xml)
if not mo:
return 'utf-8'
else:
return mo.group(1).lower()
def charsetFromMetaEquiv(html):
""" Return the value of the 'charset' from a html document
containing <meta http-equiv="content-type" content="text/html; charset=utf8>.
Returns None, if not available.
"""
# first check for the <meta...> tag
mo = http_equiv_reg.search(html)
if mo:
# extract the meta tag
meta = mo.group(1)
# search for the charset value
mo = http_equiv_reg2.search(meta)
if mo:
# return charset
return mo.group(1).lower()
return None
<html>
<head>
<title tal:content="template/title">The title</title>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
</head>
<body>
......
<h1 tal:replace="structure python:context.manage_page_header(management_page_charset='utf-8')">Header</h1>
<h1 tal:replace="structure python:context.manage_page_header(management_page_charset=context.output_encoding)">Header</h1>
<h2 tal:define="manage_tabs_message options/manage_tabs_message | nothing"
tal:replace="structure context/manage_tabs">Tabs</h2>
......@@ -33,7 +33,7 @@
tal:content="python:context.bobobase_modification_time().strftime('%Y-%m-%d %I:%M %p')">1/1/2000
</div>
</td>
<td align="left" valign="top" colspan="2">
<td align="left" valign="top" colspan="2" rowspan="2">
<a href="source.html" tal:condition="context/html">Browse HTML source</a>
<a href="source.xml" tal:condition="not:context/html">Browse XML source</a>
<br />
......@@ -44,6 +44,17 @@
</td>
</tr>
<tr>
<td align="left" valign="middle">
<div class="form-label">Output encoding</div>
</td>
<td align="left" valign="middle">
<div class="form-text"
tal:content="context/output_encoding"
/>
</td>
</tr>
<tr tal:define="errors context/pt_errors" tal:condition="errors">
<tal:block define="global body python:context.document_src({'raw':1})" />
<td align="left" valign="middle" class="form-label">Errors</td>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment