From 8f3c4454ab8b79b9f6080143b4b0f0f49540726b Mon Sep 17 00:00:00 2001 From: Kazuhiko Shiozaki <kazuhiko@nexedi.com> Date: Tue, 4 Jan 2011 16:16:19 +0000 Subject: [PATCH] use Python-2.6's HTMLParser's entitydefs initialisation logic that uses unicode instead of str. git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@42002 20353a03-c40f-0410-a6d1-a30d3c3de9de --- product/PortalTransforms/transforms/safe_html.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/product/PortalTransforms/transforms/safe_html.py b/product/PortalTransforms/transforms/safe_html.py index b22bfce5b3..594b1c2e7c 100644 --- a/product/PortalTransforms/transforms/safe_html.py +++ b/product/PortalTransforms/transforms/safe_html.py @@ -147,8 +147,6 @@ class StrippingParser(HTMLParser): Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved. """ - from htmlentitydefs import entitydefs # replace entitydefs from sgmllib - def __init__(self, valid, nasty, remove_javascript, raise_error, default_encoding): HTMLParser.__init__( self ) @@ -180,6 +178,15 @@ class StrippingParser(HTMLParser): def handle_entityref(self, name): if self.suppress: return + # (begin) copied from Python-2.6's HTMLParser.py + # Cannot use name2codepoint directly, because HTMLParser supports apos, + # which is not part of HTML 4 + if self.entitydefs is None: + import htmlentitydefs + entitydefs = HTMLParser.entitydefs = {'apos':u"'"} + for k, v in htmlentitydefs.name2codepoint.iteritems(): + entitydefs[k] = unichr(v) + # (end) copied from Python-2.6's HTMLParser.py if self.entitydefs.has_key(name): x = ';' else: -- 2.30.9