Commit 8cb0aa37 authored by Ivan Tyagov's avatar Ivan Tyagov

Fix TextDocument class so it can work consistently with abstract Document class

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@14907 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 6b4a09f5
...@@ -1090,7 +1090,7 @@ class Document(XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin): ...@@ -1090,7 +1090,7 @@ class Document(XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin):
else: else:
stripped_html = html stripped_html = html
# find charset and convert to utf-8 # find charset and convert to utf-8
charset_list = self.charset_parser.findall(html) charset_list = self.charset_parser.findall(str(html))
if charset_list: if charset_list:
stripped_html = unicode(stripped_html, charset_list[0]).encode('utf-8') stripped_html = unicode(stripped_html, charset_list[0]).encode('utf-8')
return stripped_html return stripped_html
......
...@@ -164,4 +164,14 @@ class TextDocument(Document, TextContent): ...@@ -164,4 +164,14 @@ class TextDocument(Document, TextContent):
base_list = re.findall(self.base_parser, str(html)) base_list = re.findall(self.base_parser, str(html))
if base_list: if base_list:
return base_list[0] return base_list[0]
return Document.getContentBaseURL(self) return Document.getContentBaseURL(self)
\ No newline at end of file
def hasBaseData(self):
"""
This method is an override of dynamically generated method for Document class.
We need to manually override it because for some backwards compatibility
instances of TextDocument as 'Web Page' doesn't use 'base_data' to store raw
data information. Instead they use 'text-content'
This makes results and login of abstract Document class inconsistent.
"""
return self.hasTextContent()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment