Commit c0686e04 authored by Andreas Jung's avatar Andreas Jung

refactored code to extract the encoding from the XML preamble and the charset

from a <meta http-equiv..> tag
parent 47e7f946
......@@ -16,6 +16,7 @@ from zope.traversing.adapters import DefaultTraversable
from Testing.makerequest import makerequest
from Testing.ZopeTestCase import ZopeTestCase, installProduct
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate, manage_addPageTemplate
from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv
ascii_str = '<html><body>hello world</body></html>'
......@@ -59,6 +60,23 @@ html_utf8_wo_header = unicode(html_template_wo_header, 'iso-8859-15').encode('ut
installProduct('PageTemplates')
class ZPTUtilsTests(unittest.TestCase):
def testExtractEncodingFromXMLPreamble(self):
extract = encodingFromXMLPreamble
self.assertEqual(extract('<?xml version="1.0" ?>'), 'utf-8')
self.assertEqual(extract('<?xml encoding="utf-8" version="1.0" ?>'), 'utf-8')
self.assertEqual(extract('<?xml encoding="UTF-8" version="1.0" ?>'), 'utf-8')
self.assertEqual(extract('<?xml encoding="ISO-8859-15" version="1.0" ?>'), 'iso-8859-15')
self.assertEqual(extract('<?xml encoding="iso-8859-15" version="1.0" ?>'), 'iso-8859-15')
def testExtractCharsetFromMetaHTTPEquivTag(self):
extract = charsetFromMetaEquiv
self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=UTF-8"></html>'), 'utf-8')
self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=iso-8859-15"></html>'), 'iso-8859-15')
self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html"></html>'), None)
self.assertEqual(extract('<html>...<html>'), None)
class ZopePageTemplateFileTests(ZopeTestCase):
......@@ -67,7 +85,7 @@ class ZopePageTemplateFileTests(ZopeTestCase):
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
self.assertEqual(result.startswith(ascii_str), True)
self.assertEqual(result.encode('ascii').startswith(ascii_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testPT_RenderWithISO885915(self):
......@@ -75,15 +93,16 @@ class ZopePageTemplateFileTests(ZopeTestCase):
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
self.assertEqual(result.startswith(iso885915_str), True)
self.assertEqual(result.encode('iso-8859-15').startswith(iso885915_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def testPT_RenderWithUTF8(self):
import pdb; pdb.set_trace()
manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8')
zpt = self.app['test']
result = zpt.pt_render()
# use startswith() because the renderer appends a trailing \n
self.assertEqual(result.startswith(utf8_str), True)
self.assertEqual(result.encode('utf-8').startswith(utf8_str), True)
self.assertEqual(zpt.output_encoding, 'iso-8859-15')
def _createZPT(self):
......@@ -243,9 +262,11 @@ class DummyFileUpload:
def test_suite():
suite = unittest.makeSuite(ZPTRegressions)
suite.addTests(unittest.makeSuite(ZPTMacros))
suite.addTests(unittest.makeSuite(ZopePageTemplateFileTests))
# suite = unittest.makeSuite(ZPTRegressions)
suite = unittest.makeSuite(ZPTUtilsTests)
# suite.addTests(unittest.makeSuite(ZPTUtilsTests))
# suite.addTests(unittest.makeSuite(ZPTMacros))
# suite.addTests(unittest.makeSuite(ZopePageTemplateFileTests))
return suite
if __name__ == '__main__':
......
##############################################################################
#
# Copyright (c) 2002 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
""" Some helper methods
$Id: ZopePageTemplate.py 71579 2006-12-17 20:26:10Z andreasjung $
"""
import re
xml_preamble_reg = re.compile(r'^<\?xml.*?encoding="(.*?)".*?\?>', re.M)
http_equiv_reg = re.compile(r'(<meta.*?http\-equiv.*?content-type.*?>)', re.I|re.M|re.S)
http_equiv_reg2 = re.compile(r'charset.*?=.*?(?P<charset>[\w\-]*)', re.I|re.M|re.S)
def encodingFromXMLPreamble(xml):
""" Extract the encoding from a xml preamble.
Return 'utf-8' if not available
"""
mo = xml_preamble_reg.match(xml)
if not mo:
return 'utf-8'
else:
return mo.group(1).lower()
def charsetFromMetaEquiv(html):
""" Return the value of the 'charset' from a html document
containing <meta http-equiv="content-type" content="text/html; charset=utf8>.
Returns None, if not available.
"""
# first check for the <meta...> tag
mo = http_equiv_reg.search(html)
if mo:
# extract the meta tag
meta = mo.group(1)
# search for the charset value
mo = http_equiv_reg2.search(meta)
if mo:
# return charset
return mo.group(1).lower()
return None
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment