Commit f21d7ddf authored by Jérome Perrin's avatar Jérome Perrin

web, PortalTransforms: py3

parent 03b05d22
...@@ -174,11 +174,7 @@ class WebSite(WebSection): ...@@ -174,11 +174,7 @@ class WebSite(WebSection):
if sub_path in section_dict: if sub_path in section_dict:
del section_dict[sub_path] del section_dict[sub_path]
section_list = section_dict.values()
# Sort by Index # Sort by Index
section_list.sort(key=lambda x: x.getIntIndex()) return sorted(section_dict.values(), key=lambda x: x.getIntIndex())
return section_list
else: else:
return [] return []
\ No newline at end of file
...@@ -14,7 +14,8 @@ TODO: export same components into one mhtml attachment if possible. ...@@ -14,7 +14,8 @@ TODO: export same components into one mhtml attachment if possible.
""" """
# ERP5 web uses format= argument, which is also a python builtin # ERP5 web uses format= argument, which is also a python builtin
# pylint: disable=redefined-builtin # pylint: disable=redefined-builtin
import six
from Products.PythonScripts.standard import html_quote
from zExceptions import Unauthorized from zExceptions import Unauthorized
from base64 import b64encode, b64decode from base64 import b64encode, b64decode
portal = context.getPortalObject() portal = context.getPortalObject()
...@@ -27,9 +28,10 @@ mhtml_message = { ...@@ -27,9 +28,10 @@ mhtml_message = {
} }
def main(data): def main(data):
if isinstance(data, str): if isinstance(data, bytes):
data = data.decode("utf-8") data = data.decode("utf-8")
data = u"".join([fn(p) for fn, p in handleHtmlPartList(parseHtml(data))]) data = u"".join([fn(p) for fn, p in handleHtmlPartList(parseHtml(data))])
if six.PY2:
data = data.encode("utf-8") data = data.encode("utf-8")
if format == "mhtml": if format == "mhtml":
mhtml_message["attachment_list"].insert(0, { mhtml_message["attachment_list"].insert(0, {
...@@ -75,7 +77,7 @@ def strHtmlPart(part): ...@@ -75,7 +77,7 @@ def strHtmlPart(part):
part_type = part[0] part_type = part[0]
if part_type in ("starttag", "startendtag"): if part_type in ("starttag", "startendtag"):
tag, attrs = handleHtmlTag(part[1], part[2]) tag, attrs = handleHtmlTag(part[1], part[2])
attrs_str = " ".join(["%s=\"%s\"" % (escapeHtml(k), escapeHtml(v or "")) for k, v in attrs]) attrs_str = " ".join(["%s=\"%s\"" % (html_quote(k), html_quote(v or "")) for k, v in attrs])
return "<%s%s%s>" % (tag, " " + attrs_str if attrs_str else "", " /" if part_type == "startendtag" else "") return "<%s%s%s>" % (tag, " " + attrs_str if attrs_str else "", " /" if part_type == "startendtag" else "")
if part_type == "endtag": if part_type == "endtag":
return "</%s>" % part[1] return "</%s>" % part[1]
...@@ -191,7 +193,7 @@ def handleHrefObject(obj, src, default_mimetype="text/html", default_data="<p>Li ...@@ -191,7 +193,7 @@ def handleHrefObject(obj, src, default_mimetype="text/html", default_data="<p>Li
data = str(obj.data or "") data = str(obj.data or "")
else: else:
data = getattr(obj, "getData", lambda: str(obj))() or "" data = getattr(obj, "getData", lambda: str(obj))() or ""
if isinstance(data, unicode): if six.PY2 and isinstance(data, unicode):
data = data.encode("utf-8") data = data.encode("utf-8")
return handleLinkedData(mime, data, src) return handleLinkedData(mime, data, src)
return handleLinkedData(default_mimetype, default_data, src) return handleLinkedData(default_mimetype, default_data, src)
...@@ -201,7 +203,7 @@ def handleHrefObject(obj, src, default_mimetype="text/html", default_data="<p>Li ...@@ -201,7 +203,7 @@ def handleHrefObject(obj, src, default_mimetype="text/html", default_data="<p>Li
# use the same behavior as when we call a script from browser URL bar. # use the same behavior as when we call a script from browser URL bar.
if not hasattr(obj, "getPortalType") and callable(obj): if not hasattr(obj, "getPortalType") and callable(obj):
mime, data = "text/html", obj() mime, data = "text/html", obj()
if isinstance(data, unicode): if six.PY2 and isinstance(data, unicode):
data = data.encode("utf-8") data = data.encode("utf-8")
return handleLinkedData(mime, data, src) return handleLinkedData(mime, data, src)
...@@ -270,7 +272,7 @@ def handleLinkedData(mime, data, href): ...@@ -270,7 +272,7 @@ def handleLinkedData(mime, data, href):
}) })
return url return url
else: else:
return "data:%s;base64,%s" % (mime, b64encode(data)) return "data:%s;base64,%s" % (mime, b64encode(data.encode()).decode())
def makeHrefAbsolute(href): def makeHrefAbsolute(href):
if isHrefAnAbsoluteUrl(href) or not isHrefAUrl(href): if isHrefAnAbsoluteUrl(href) or not isHrefAUrl(href):
...@@ -325,6 +327,7 @@ def replaceFromDataUri(data_uri, replacer): ...@@ -325,6 +327,7 @@ def replaceFromDataUri(data_uri, replacer):
if ";base64" in header: if ";base64" in header:
is_base64 = True is_base64 = True
data = b64decode(data) data = b64decode(data)
if not is_base64:
data = replacer(data) data = replacer(data)
return "%s,%s" % (header, b64encode(data) if is_base64 else data) return "%s,%s" % (header, b64encode(data) if is_base64 else data)
...@@ -346,9 +349,6 @@ def parseUrlSearch(search): ...@@ -346,9 +349,6 @@ def parseUrlSearch(search):
def parseHtml(text): def parseHtml(text):
return context.Base_parseHtml(text) return context.Base_parseHtml(text)
def escapeHtml(text):
return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace("\"", "&quot;")
def anny(iterable, key=None): def anny(iterable, key=None):
for i in iterable: for i in iterable:
if key: if key:
......
...@@ -163,7 +163,11 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent ...@@ -163,7 +163,11 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
if mime_type == 'text/html': if mime_type == 'text/html':
mime_type = 'text/x-html-safe' mime_type = 'text/x-html-safe'
if src_mimetype != "image/svg+xml": if src_mimetype != "image/svg+xml":
result = portal_transforms.convertToData(mime_type, text_content, if six.PY2:
data = text_content
else:
data = text_content.encode()
result = portal_transforms.convertToData(mime_type, data,
object=self, context=self, object=self, context=self,
filename=filename, filename=filename,
mimetype=src_mimetype, mimetype=src_mimetype,
...@@ -373,6 +377,7 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent ...@@ -373,6 +377,7 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
text_content, content_type) text_content, content_type)
else: else:
message = 'Conversion to base format succeeds' message = 'Conversion to base format succeeds'
# TODO(zope4py3): rethink this, shouldn't we store bytes in base data ?
self._setBaseData(text_content) self._setBaseData(text_content)
self._setBaseContentType(content_type) self._setBaseContentType(content_type)
return message return message
...@@ -385,15 +390,17 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent ...@@ -385,15 +390,17 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
self._checkConversionFormatPermission(None) self._checkConversionFormatPermission(None)
if default is _MARKER: if default is _MARKER:
text_content = self._baseGetTextContent() text_content = self._baseGetTextContent()
else:
text_content = self._baseGetTextContent(default) text_content = self._baseGetTextContent(default)
if isinstance(text_content, bytes): if isinstance(text_content, bytes):
# XXX Zope4py3: should this return str ?? # TODO(Zope4py3): should this return str ??
# We probably have "legacy" documents where `text_content` is a python2 # We probably have "legacy" documents where `text_content` is a python2
# str encoded as something else than utf-8. # str encoded as something else than utf-8.
# Maybe we should introduce a new text_content_encoding property and # Maybe we should introduce a new text_content_encoding property and
# expose API to getRawTextContent (as bytes) and getTextContent would return # expose API to getRawTextContent (as bytes) and getTextContent would return
# the decoded string. # the decoded string.
# XXX what about _convertToBaseFormat/guessCharsetAndConvert ??? # XXX what about _convertToBaseFormat/guessCharsetAndConvert ???
LOG('TextDocument', WARNING, "getTextContent with bytes %s" % text_content)
try: try:
text_content = text_content.decode('utf-8') text_content = text_content.decode('utf-8')
except UnicodeDecodeError: except UnicodeDecodeError:
......
...@@ -32,6 +32,7 @@ from Products.ERP5Type.Globals import InitializeClass ...@@ -32,6 +32,7 @@ from Products.ERP5Type.Globals import InitializeClass
from Products.ERP5Type import Permissions from Products.ERP5Type import Permissions
from warnings import warn from warnings import warn
class TextConvertableMixin: class TextConvertableMixin:
""" """
This class provides a generic implementation of ITextConvertable. This class provides a generic implementation of ITextConvertable.
...@@ -46,9 +47,9 @@ class TextConvertableMixin: ...@@ -46,9 +47,9 @@ class TextConvertableMixin:
""" """
Converts the current document to plain text Converts the current document to plain text
""" """
kw.pop('format', None) kw['format'] = 'txt'
_, data = self.convert(format='txt', **kw) _, data = self.convert(**kw)
return str(data) return data
security.declareProtected(Permissions.AccessContentsInformation, security.declareProtected(Permissions.AccessContentsInformation,
'asRawText') 'asRawText')
...@@ -56,9 +57,9 @@ class TextConvertableMixin: ...@@ -56,9 +57,9 @@ class TextConvertableMixin:
""" """
Converts the current document to plain text without substitution Converts the current document to plain text without substitution
""" """
kw.pop('format', None) kw['format'] = 'txt'
_, data = self.convert(format='txt', substitute=False, **kw) kw['substitute'] = False
return str(data) return self.asText(**kw)
security.declareProtected(Permissions.AccessContentsInformation, security.declareProtected(Permissions.AccessContentsInformation,
'asTextContent') 'asTextContent')
......
...@@ -20,8 +20,15 @@ from Products.PortalTransforms.transforms.broken import BrokenTransform ...@@ -20,8 +20,15 @@ from Products.PortalTransforms.transforms.broken import BrokenTransform
def import_from_name(module_name): def import_from_name(module_name):
""" import and return a module by its name """ """import and return a module by its name"""
return __import__(module_name, {}, {}, module_name) __traceback_info__ = (module_name,)
m = __import__(module_name)
try:
for sub in module_name.split(".")[1:]:
m = getattr(m, sub)
except AttributeError as e:
raise ImportError(str(e))
return m
def make_config_persistent(kwargs): def make_config_persistent(kwargs):
""" iterates on the given dictionnary and replace list by persistent list, """ iterates on the given dictionnary and replace list by persistent list,
......
...@@ -154,7 +154,7 @@ class subprocesstransform: ...@@ -154,7 +154,7 @@ class subprocesstransform:
try: try:
if not self.useStdin: if not self.useStdin:
stdin_file = tempfile.NamedTemporaryFile() stdin_file = tempfile.NamedTemporaryFile()
stdin_file.write( data) stdin_file.write(data)
stdin_file.seek(0) stdin_file.seek(0)
command = command % {'infile': stdin_file.name} # apply tmp name to command command = command % {'infile': stdin_file.name} # apply tmp name to command
data = None data = None
......
...@@ -230,9 +230,4 @@ class IllegalHTML( ValueError ): ...@@ -230,9 +230,4 @@ class IllegalHTML( ValueError ):
# j = i + len(toHandle) # j = i + len(toHandle)
# return j # return j
# def scrubHTML( html ): from Products.PortalTransforms.transforms.safe_html import scrubHTML
# """ Strip illegal HTML tags from string text. """
# parser = StrippingParser()
# parser.feed( html )
# parser.close()
# return parser.result
...@@ -21,14 +21,14 @@ def register(): ...@@ -21,14 +21,14 @@ def register():
return unichr(result).encode('utf-8') return unichr(result).encode('utf-8')
return html_to_text("html_to_text", return html_to_text("html_to_text",
('<script [^>]>.*</script>(?im)', ' '), ('(?im)<script [^>]>.*</script>', ' '),
('<style [^>]>.*</style>(?im)', ' '), ('(?im)<style [^>]>.*</style>', ' '),
('<head [^>]>.*</head>(?im)', ' '), ('(?im)<head [^>]>.*</head>', ' '),
# added for ERP5, we want to transform <br/> in newlines # added for ERP5, we want to transform <br/> in newlines
('<br\s*/?>(?im)', '\n'), ('(?im)<br\s*/?>', '\n'),
('(?im)</?(font|em|i|strong|b)(?=\W)[^>]*>', ''), ('(?im)</?(font|em|i|strong|b)(?=\W)[^>]*>', ''),
('<[^>]*>(?i)(?m)', ' '), ('(?i)(?m)<[^>]*>', ' '),
(r'&([a-zA-Z0-9#]*?);', sub_func), (r'&([a-zA-Z0-9#]*?);', sub_func),
) )
...@@ -226,7 +226,7 @@ class StrippingParser(HTMLParser): ...@@ -226,7 +226,7 @@ class StrippingParser(HTMLParser):
def handle_data(self, data): def handle_data(self, data):
if self.suppress: return if self.suppress: return
data = html_quote(data) data = html_quote(data)
if self.original_charset and isinstance(data, str): if self.original_charset and isinstance(data, bytes):
data = data.decode(self.original_charset) data = data.decode(self.original_charset)
self.result.append(data) self.result.append(data)
...@@ -332,7 +332,7 @@ class StrippingParser(HTMLParser): ...@@ -332,7 +332,7 @@ class StrippingParser(HTMLParser):
k = len(self.rawdata) k = len(self.rawdata)
data = self.rawdata[i+9:k] data = self.rawdata[i+9:k]
j = k+3 j = k+3
if self.original_charset and isinstance(data, str): if self.original_charset and isinstance(data, bytes):
data = data.decode(self.original_charset) data = data.decode(self.original_charset)
self.result.append("<![CDATA[%s]]>" % data) self.result.append("<![CDATA[%s]]>" % data)
else: else:
...@@ -378,7 +378,7 @@ def scrubHTML(html, valid=VALID_TAGS, nasty=NASTY_TAGS, ...@@ -378,7 +378,7 @@ def scrubHTML(html, valid=VALID_TAGS, nasty=NASTY_TAGS,
parser.feed(html) parser.feed(html)
parser.close() parser.close()
result = parser.getResult() result = parser.getResult()
if parser.original_charset and isinstance(result, str): if parser.original_charset and isinstance(result, bytes):
result = result.decode(parser.original_charset).encode(default_encoding) result = result.decode(parser.original_charset).encode(default_encoding)
return result return result
......
import six
from Products.PortalTransforms.interfaces import ITransform from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implementer from zope.interface import implementer
from DocumentTemplate.html_quote import html_quote from DocumentTemplate.html_quote import html_quote
...@@ -30,6 +31,7 @@ class TextPreToHTML: ...@@ -30,6 +31,7 @@ class TextPreToHTML:
raise AttributeError(attr) raise AttributeError(attr)
def convert(self, orig, data, **kwargs): def convert(self, orig, data, **kwargs):
orig = six.ensure_text(orig, errors='replace')
data.setData('<pre class="data">%s</pre>' % html_quote(orig)) data.setData('<pre class="data">%s</pre>' % html_quote(orig))
return data return data
......
import six
from Products.PortalTransforms.interfaces import ITransform from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implementer from zope.interface import implementer
from DocumentTemplate.html_quote import html_quote from DocumentTemplate.html_quote import html_quote
...@@ -30,6 +31,7 @@ class TextToHTML: ...@@ -30,6 +31,7 @@ class TextToHTML:
raise AttributeError(attr) raise AttributeError(attr)
def convert(self, orig, data, **kwargs): def convert(self, orig, data, **kwargs):
orig = six.ensure_text(orig, errors='replace')
# Replaces all line breaks with a br tag, and wraps it in a p tag. # Replaces all line breaks with a br tag, and wraps it in a p tag.
data.setData('<p>%s</p>' % html_quote(orig.strip()).replace('\n', '<br />')) data.setData('<p>%s</p>' % html_quote(orig.strip()).replace('\n', '<br />'))
return data return data
......
...@@ -2,12 +2,13 @@ ...@@ -2,12 +2,13 @@
"""some common utilities """some common utilities
""" """
import io
import six import six
if six.PY2: if six.PY2:
from email import message_from_file as message_from_bytes from email import message_from_string as message_from_bytes
else: else:
from email import message_from_bytes from email import message_from_bytes
from six.moves import cStringIO as StringIO
class TransformException(Exception): class TransformException(Exception):
pass pass
...@@ -35,4 +36,6 @@ def safeToInt(value): ...@@ -35,4 +36,6 @@ def safeToInt(value):
def parseContentType(content_type): def parseContentType(content_type):
"""Parses `text/plain;charset="utf-8"` to a email.Message object""" """Parses `text/plain;charset="utf-8"` to a email.Message object"""
return message_from_bytes(StringIO("Content-Type:" + content_type.replace("\r\n", "\r\n\t"))) return message_from_bytes(
b"Content-Type:"
+ content_type.replace("\r\n", "\r\n\t").encode('utf-8'))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment