use Python-2.6's HTMLParser's entitydefs initialisation logic that uses unicode instead of str.

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@42002 20353a03-c40f-0410-a6d1-a30d3c3de9de

use Python-2.6's HTMLParser's entitydefs initialisation logic that uses unicode instead of str.
git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@42002 20353a03-c40f-0410-a6d1-a30d3c3de9de
8f3c4454 · Kazuhiko Shiozaki · 0f221b82 · 8f3c4454
Commit 8f3c4454 authored Jan 04, 2011 by Kazuhiko Shiozaki
Hide whitespace changes
Inline Side-by-side

Showing with 9 additions and 2 deletions

product/PortalTransforms/transforms/safe_html.py product/PortalTransforms/transforms/safe_html.py +9 -2

No files found.
--- a/product/PortalTransforms/transforms/safe_html.py
+++ b/product/PortalTransforms/transforms/safe_html.py
@@ -147,8 +147,6 @@ class StrippingParser(HTMLParser):
    Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
    """

-    from htmlentitydefs import entitydefs # replace entitydefs from sgmllib
-
    def __init__(self, valid, nasty, remove_javascript, raise_error,
                 default_encoding):
        HTMLParser.__init__( self )
@@ -180,6 +178,15 @@ class StrippingParser(HTMLParser):

    def handle_entityref(self, name):
        if self.suppress: return
+        # (begin) copied from Python-2.6's HTMLParser.py
+        # Cannot use name2codepoint directly, because HTMLParser supports apos,
+        # which is not part of HTML 4
+        if self.entitydefs is None:
+            import htmlentitydefs
+            entitydefs = HTMLParser.entitydefs = {'apos':u"'"}
+            for k, v in htmlentitydefs.name2codepoint.iteritems():
+                entitydefs[k] = unichr(v)
+        # (end) copied from Python-2.6's HTMLParser.py
        if self.entitydefs.has_key(name):
            x = ';'
        else: