Commit 8f3c4454 authored by Kazuhiko Shiozaki's avatar Kazuhiko Shiozaki

use Python-2.6's HTMLParser's entitydefs initialisation logic that uses unicode instead of str.


git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@42002 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 0f221b82
......@@ -147,8 +147,6 @@ class StrippingParser(HTMLParser):
Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
"""
from htmlentitydefs import entitydefs # replace entitydefs from sgmllib
def __init__(self, valid, nasty, remove_javascript, raise_error,
default_encoding):
HTMLParser.__init__( self )
......@@ -180,6 +178,15 @@ class StrippingParser(HTMLParser):
def handle_entityref(self, name):
if self.suppress: return
# (begin) copied from Python-2.6's HTMLParser.py
# Cannot use name2codepoint directly, because HTMLParser supports apos,
# which is not part of HTML 4
if self.entitydefs is None:
import htmlentitydefs
entitydefs = HTMLParser.entitydefs = {'apos':u"'"}
for k, v in htmlentitydefs.name2codepoint.iteritems():
entitydefs[k] = unichr(v)
# (end) copied from Python-2.6's HTMLParser.py
if self.entitydefs.has_key(name):
x = ';'
else:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment