From 8f3c4454ab8b79b9f6080143b4b0f0f49540726b Mon Sep 17 00:00:00 2001
From: Kazuhiko Shiozaki <kazuhiko@nexedi.com>
Date: Tue, 4 Jan 2011 16:16:19 +0000
Subject: [PATCH] use Python-2.6's HTMLParser's entitydefs initialisation logic
 that uses unicode instead of str.

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@42002 20353a03-c40f-0410-a6d1-a30d3c3de9de
---
 product/PortalTransforms/transforms/safe_html.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/product/PortalTransforms/transforms/safe_html.py b/product/PortalTransforms/transforms/safe_html.py
index b22bfce5b3..594b1c2e7c 100644
--- a/product/PortalTransforms/transforms/safe_html.py
+++ b/product/PortalTransforms/transforms/safe_html.py
@@ -147,8 +147,6 @@ class StrippingParser(HTMLParser):
     Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
     """
 
-    from htmlentitydefs import entitydefs # replace entitydefs from sgmllib
-
     def __init__(self, valid, nasty, remove_javascript, raise_error,
                  default_encoding):
         HTMLParser.__init__( self )
@@ -180,6 +178,15 @@ class StrippingParser(HTMLParser):
 
     def handle_entityref(self, name):
         if self.suppress: return
+        # (begin) copied from Python-2.6's HTMLParser.py
+        # Cannot use name2codepoint directly, because HTMLParser supports apos,
+        # which is not part of HTML 4
+        if self.entitydefs is None:
+            import htmlentitydefs
+            entitydefs = HTMLParser.entitydefs = {'apos':u"'"}
+            for k, v in htmlentitydefs.name2codepoint.iteritems():
+                entitydefs[k] = unichr(v)
+        # (end) copied from Python-2.6's HTMLParser.py
         if self.entitydefs.has_key(name):
             x = ';'
         else:
-- 
2.30.9