Commit 093c119b authored by Jérome Perrin's avatar Jérome Perrin

core: text document / mail message str vs bytes WIP

parent d0251c34
...@@ -115,7 +115,7 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent ...@@ -115,7 +115,7 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
text = Template(text).substitute(unicode_mapping) text = Template(text).substitute(unicode_mapping)
# If the original was a str, convert it back to str. # If the original was a str, convert it back to str.
if is_str: if six.PY2 and is_str:
text = text.encode('utf-8') text = text.encode('utf-8')
return text return text
...@@ -188,12 +188,15 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent ...@@ -188,12 +188,15 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
self.setConversion(result, original_mime_type, **kw) self.setConversion(result, original_mime_type, **kw)
else: else:
mime_type, result = self.getConversion(**kw) mime_type, result = self.getConversion(**kw)
if substitute and format in VALID_TEXT_FORMAT_LIST: if format in VALID_TEXT_FORMAT_LIST:
# only textual content can be sustituted if six.PY3 and isinstance(result, bytes):
if substitution_method_parameter_dict is None: result = result.decode()
substitution_method_parameter_dict = {} if substitute:
result = self._substituteTextContent(result, safe_substitute=safe_substitute, # only textual content can be sustituted
**substitution_method_parameter_dict) if substitution_method_parameter_dict is None:
substitution_method_parameter_dict = {}
result = self._substituteTextContent(result, safe_substitute=safe_substitute,
**substitution_method_parameter_dict)
return original_mime_type, result return original_mime_type, result
else: else:
# text_content is not set, return empty string instead of None # text_content is not set, return empty string instead of None
...@@ -375,21 +378,27 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent ...@@ -375,21 +378,27 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
return message return message
security.declareProtected(Permissions.AccessContentsInformation, 'getTextContent') security.declareProtected(Permissions.AccessContentsInformation, 'getTextContent')
def getTextContent(self, default=_MARKER): def getTextContent(self, default=_MARKER, encoding=None):
"""Overriden method to check """Overriden method to check
permission to access content in raw format permission to access content in raw format and manage encoding.
""" """
# XXX Zope4py3: should this return str ??
# We probably have "legacy" documents where `text_content` is a python2
# str encoded as something else than utf-8.
# Maybe we should introduce a new text_content_encoding property and
# expose API to getRawTextContent (as bytes) and getTextContent would return
# the decoded string.
self._checkConversionFormatPermission(None) self._checkConversionFormatPermission(None)
if default is _MARKER: if default is _MARKER:
return self._baseGetTextContent() text_content = self._baseGetTextContent()
else: text_content = self._baseGetTextContent(default)
return self._baseGetTextContent(default) if isinstance(text_content, bytes):
# XXX Zope4py3: should this return str ??
# We probably have "legacy" documents where `text_content` is a python2
# str encoded as something else than utf-8.
# Maybe we should introduce a new text_content_encoding property and
# expose API to getRawTextContent (as bytes) and getTextContent would return
# the decoded string.
# XXX what about _convertToBaseFormat/guessCharsetAndConvert ???
try:
text_content = text_content.decode('utf-8')
except UnicodeDecodeError:
text_content = text_content.decode('latin1')
return text_content
# Backward compatibility for replacement of text_format by content_type # Backward compatibility for replacement of text_format by content_type
security.declareProtected(Permissions.AccessContentsInformation, 'getTextFormat') security.declareProtected(Permissions.AccessContentsInformation, 'getTextFormat')
...@@ -424,9 +433,11 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent ...@@ -424,9 +433,11 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
""" """
if not self.hasData(): if not self.hasData():
if default is _MARKER: if default is _MARKER:
return self.getTextContent() data = self._baseGetTextContent()
else: else:
return self.getTextContent(default) data = self._baseGetTextContent(default)
if not isinstance(data, bytes):
return data.encode('utf-8')
else: else:
if default is _MARKER: if default is _MARKER:
return File.getData(self) return File.getData(self)
......
...@@ -149,6 +149,7 @@ class DownloadableMixin: ...@@ -149,6 +149,7 @@ class DownloadableMixin:
RESPONSE.setHeader('Content-Length', len(data)) RESPONSE.setHeader('Content-Length', len(data))
if output_format in VALID_TEXT_FORMAT_LIST: if output_format in VALID_TEXT_FORMAT_LIST:
RESPONSE.setHeader('Content-Type', '%s; charset=utf-8' % mime) RESPONSE.setHeader('Content-Type', '%s; charset=utf-8' % mime)
data = data.encode('utf-8')
else: else:
RESPONSE.setHeader('Content-Type', mime) RESPONSE.setHeader('Content-Type', mime)
if inline is _MARKER: if inline is _MARKER:
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
from AccessControl import ClassSecurityInfo from AccessControl import ClassSecurityInfo
from Products.ERP5Type.Globals import InitializeClass from Products.ERP5Type.Globals import InitializeClass
from Products.ERP5Type import Permissions from Products.ERP5Type import Permissions
from Products.ERP5Type.Utils import guessEncodingFromText from Products.ERP5Type.Utils import guessEncodingFromText # TODO: guessEncodingFromBytes
from zLOG import LOG, INFO from zLOG import LOG, INFO
from email.header import decode_header, HeaderParseError from email.header import decode_header, HeaderParseError
...@@ -42,7 +42,7 @@ filename_regexp = 'name="([^"]*)"' ...@@ -42,7 +42,7 @@ filename_regexp = 'name="([^"]*)"'
def testCharsetAndConvert(text_content, content_type, encoding): def testCharsetAndConvert(text_content, content_type, encoding):
try: try:
if encoding is not None: if encoding is not None:
text_content = text_content.decode(encoding).encode('utf-8') text_content = text_content.decode(encoding)
else: else:
if six.PY2: if six.PY2:
text_content = text_content.decode().encode('utf-8') text_content = text_content.decode().encode('utf-8')
...@@ -50,8 +50,9 @@ def testCharsetAndConvert(text_content, content_type, encoding): ...@@ -50,8 +50,9 @@ def testCharsetAndConvert(text_content, content_type, encoding):
encoding = guessEncodingFromText(text_content, content_type) encoding = guessEncodingFromText(text_content, content_type)
if encoding is not None: if encoding is not None:
try: try:
text_content = text_content.decode(encoding).encode('utf-8') text_content = text_content.decode(encoding)
except (UnicodeDecodeError, LookupError): except (UnicodeDecodeError, LookupError):
# TODO: errors= repr ?
text_content = repr(text_content)[1:-1] text_content = repr(text_content)[1:-1]
else: else:
text_content = repr(text_content)[1:-1] text_content = repr(text_content)[1:-1]
...@@ -113,9 +114,6 @@ class MailMessageMixin: ...@@ -113,9 +114,6 @@ class MailMessageMixin:
""" """
Returns the content information from the header information. Returns the content information from the header information.
This is used by the metadata discovery system. This is used by the metadata discovery system.
Header information is converted in UTF-8 since this is the standard
way of representing strings in ERP5.
""" """
result = {} result = {}
for (name, value) in self._getMessage().items(): for (name, value) in self._getMessage().items():
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment