Commit 093c119b authored by Jérome Perrin's avatar Jérome Perrin

core: text document / mail message str vs bytes WIP

parent d0251c34
...@@ -115,7 +115,7 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent ...@@ -115,7 +115,7 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
text = Template(text).substitute(unicode_mapping) text = Template(text).substitute(unicode_mapping)
# If the original was a str, convert it back to str. # If the original was a str, convert it back to str.
if is_str: if six.PY2 and is_str:
text = text.encode('utf-8') text = text.encode('utf-8')
return text return text
...@@ -188,7 +188,10 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent ...@@ -188,7 +188,10 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
self.setConversion(result, original_mime_type, **kw) self.setConversion(result, original_mime_type, **kw)
else: else:
mime_type, result = self.getConversion(**kw) mime_type, result = self.getConversion(**kw)
if substitute and format in VALID_TEXT_FORMAT_LIST: if format in VALID_TEXT_FORMAT_LIST:
if six.PY3 and isinstance(result, bytes):
result = result.decode()
if substitute:
# only textual content can be sustituted # only textual content can be sustituted
if substitution_method_parameter_dict is None: if substitution_method_parameter_dict is None:
substitution_method_parameter_dict = {} substitution_method_parameter_dict = {}
...@@ -375,21 +378,27 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent ...@@ -375,21 +378,27 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
return message return message
security.declareProtected(Permissions.AccessContentsInformation, 'getTextContent') security.declareProtected(Permissions.AccessContentsInformation, 'getTextContent')
def getTextContent(self, default=_MARKER): def getTextContent(self, default=_MARKER, encoding=None):
"""Overriden method to check """Overriden method to check
permission to access content in raw format permission to access content in raw format and manage encoding.
""" """
self._checkConversionFormatPermission(None)
if default is _MARKER:
text_content = self._baseGetTextContent()
text_content = self._baseGetTextContent(default)
if isinstance(text_content, bytes):
# XXX Zope4py3: should this return str ?? # XXX Zope4py3: should this return str ??
# We probably have "legacy" documents where `text_content` is a python2 # We probably have "legacy" documents where `text_content` is a python2
# str encoded as something else than utf-8. # str encoded as something else than utf-8.
# Maybe we should introduce a new text_content_encoding property and # Maybe we should introduce a new text_content_encoding property and
# expose API to getRawTextContent (as bytes) and getTextContent would return # expose API to getRawTextContent (as bytes) and getTextContent would return
# the decoded string. # the decoded string.
self._checkConversionFormatPermission(None) # XXX what about _convertToBaseFormat/guessCharsetAndConvert ???
if default is _MARKER: try:
return self._baseGetTextContent() text_content = text_content.decode('utf-8')
else: except UnicodeDecodeError:
return self._baseGetTextContent(default) text_content = text_content.decode('latin1')
return text_content
# Backward compatibility for replacement of text_format by content_type # Backward compatibility for replacement of text_format by content_type
security.declareProtected(Permissions.AccessContentsInformation, 'getTextFormat') security.declareProtected(Permissions.AccessContentsInformation, 'getTextFormat')
...@@ -424,9 +433,11 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent ...@@ -424,9 +433,11 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
""" """
if not self.hasData(): if not self.hasData():
if default is _MARKER: if default is _MARKER:
return self.getTextContent() data = self._baseGetTextContent()
else: else:
return self.getTextContent(default) data = self._baseGetTextContent(default)
if not isinstance(data, bytes):
return data.encode('utf-8')
else: else:
if default is _MARKER: if default is _MARKER:
return File.getData(self) return File.getData(self)
......
...@@ -149,6 +149,7 @@ class DownloadableMixin: ...@@ -149,6 +149,7 @@ class DownloadableMixin:
RESPONSE.setHeader('Content-Length', len(data)) RESPONSE.setHeader('Content-Length', len(data))
if output_format in VALID_TEXT_FORMAT_LIST: if output_format in VALID_TEXT_FORMAT_LIST:
RESPONSE.setHeader('Content-Type', '%s; charset=utf-8' % mime) RESPONSE.setHeader('Content-Type', '%s; charset=utf-8' % mime)
data = data.encode('utf-8')
else: else:
RESPONSE.setHeader('Content-Type', mime) RESPONSE.setHeader('Content-Type', mime)
if inline is _MARKER: if inline is _MARKER:
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
from AccessControl import ClassSecurityInfo from AccessControl import ClassSecurityInfo
from Products.ERP5Type.Globals import InitializeClass from Products.ERP5Type.Globals import InitializeClass
from Products.ERP5Type import Permissions from Products.ERP5Type import Permissions
from Products.ERP5Type.Utils import guessEncodingFromText from Products.ERP5Type.Utils import guessEncodingFromText # TODO: guessEncodingFromBytes
from zLOG import LOG, INFO from zLOG import LOG, INFO
from email.header import decode_header, HeaderParseError from email.header import decode_header, HeaderParseError
...@@ -42,7 +42,7 @@ filename_regexp = 'name="([^"]*)"' ...@@ -42,7 +42,7 @@ filename_regexp = 'name="([^"]*)"'
def testCharsetAndConvert(text_content, content_type, encoding): def testCharsetAndConvert(text_content, content_type, encoding):
try: try:
if encoding is not None: if encoding is not None:
text_content = text_content.decode(encoding).encode('utf-8') text_content = text_content.decode(encoding)
else: else:
if six.PY2: if six.PY2:
text_content = text_content.decode().encode('utf-8') text_content = text_content.decode().encode('utf-8')
...@@ -50,8 +50,9 @@ def testCharsetAndConvert(text_content, content_type, encoding): ...@@ -50,8 +50,9 @@ def testCharsetAndConvert(text_content, content_type, encoding):
encoding = guessEncodingFromText(text_content, content_type) encoding = guessEncodingFromText(text_content, content_type)
if encoding is not None: if encoding is not None:
try: try:
text_content = text_content.decode(encoding).encode('utf-8') text_content = text_content.decode(encoding)
except (UnicodeDecodeError, LookupError): except (UnicodeDecodeError, LookupError):
# TODO: errors= repr ?
text_content = repr(text_content)[1:-1] text_content = repr(text_content)[1:-1]
else: else:
text_content = repr(text_content)[1:-1] text_content = repr(text_content)[1:-1]
...@@ -113,9 +114,6 @@ class MailMessageMixin: ...@@ -113,9 +114,6 @@ class MailMessageMixin:
""" """
Returns the content information from the header information. Returns the content information from the header information.
This is used by the metadata discovery system. This is used by the metadata discovery system.
Header information is converted in UTF-8 since this is the standard
way of representing strings in ERP5.
""" """
result = {} result = {}
for (name, value) in self._getMessage().items(): for (name, value) in self._getMessage().items():
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment