From 97ac9e6238a6f1067698bc0378a9afde1bfbd29f Mon Sep 17 00:00:00 2001 From: Tristan Cavelier <tristan.cavelier@nexedi.com> Date: Mon, 27 Jun 2016 17:28:53 +0200 Subject: [PATCH] erp5_web: fix some WebPage_exportAsSingleFile issues - use `obj.data` instead of `str(obj)` to get File object content - fix data_uri split unpack issue - fix str/unicode encoding errors - fix allow parse url list in css --- .../extension.erp5.WebUtility.py | 3 +-- .../erp5_web/WebPage_exportAsSingleFile.py | 23 +++++++++++++------ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/bt5/erp5_web/ExtensionTemplateItem/portal_components/extension.erp5.WebUtility.py b/bt5/erp5_web/ExtensionTemplateItem/portal_components/extension.erp5.WebUtility.py index 0f788166b7..cbb4677cc5 100644 --- a/bt5/erp5_web/ExtensionTemplateItem/portal_components/extension.erp5.WebUtility.py +++ b/bt5/erp5_web/ExtensionTemplateItem/portal_components/extension.erp5.WebUtility.py @@ -98,8 +98,7 @@ def partition(text, separatorRegexp): return result css_comment_filter_re = re.compile(r"/\*((?:[^\*]|\*[^/])*)\*/") -#css_url_re = re.compile(r"""(:[ \t]*url\()((")([^"]*)"|(')([^']*)'|([^\)]*))\)""") -css_url_re = re.compile(r"""(:[ \t]*url\()(\s*(")([^"]*)"\s*|\s*(')([^']*)'\s*|([^\)]*))\)""") +css_url_re = re.compile(r"""(url\()(\s*(")([^"]*)"\s*|\s*(')([^']*)'\s*|([^\)]*))\)""") def parseCssForUrl(text): """ return tuple list like: [ diff --git a/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_exportAsSingleFile.py b/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_exportAsSingleFile.py index d74198aee1..2751813997 100644 --- a/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_exportAsSingleFile.py +++ b/bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_exportAsSingleFile.py @@ -18,15 +18,18 @@ mhtml_message = { } def main(): - data = context.getTextContent("").decode("utf-8") - data = "".join([fn(p) for fn, p in handleHtmlPartList(parseHtml(data))]) + data = context.getTextContent("") + if isinstance(data, str): + data = data.decode("utf-8") + data = u"".join([fn(p) for fn, p in handleHtmlPartList(parseHtml(data))]) + data = data.encode("utf-8") if format == "mhtml": mhtml_message["header_dict"]["Subject"] = context.getTitle() or "Untitled" mhtml_message["attachment_list"].insert(0, { "mime_type": "text/html", "encode": "quoted-printable", "add_header_list": [("Content-Location", context.absolute_url())], - "data": str(data.encode("utf-8")), + "data": data, }) res = context.Base_formatAttachmentListToMIMEMultipartString(**mhtml_message) if REQUEST is not None: @@ -185,7 +188,10 @@ def handleHrefObject(obj, src, default_mimetype="text/html", default_data="<p>Li if hasattr(obj, "getContentType"): mime = obj.getContentType() if mime: - data = getattr(obj, "getData", lambda: str(obj))() or "" + if hasattr(obj, "data"): + data = obj.data or "" + else: + data = getattr(obj, "getData", lambda: str(obj))() or "" if isinstance(data, unicode): data = data.encode("utf-8") return handleLinkedData(mime, data, src) @@ -260,9 +266,9 @@ def isHrefAUrl(href): normalize_kw = {"keep_empty": False, "keep_trailing_slash": False} def traverseHref(url, allow_hash=False): - url = url.split("?")[0] + url = url.split("?", 1)[0] if not allow_hash: - url = url.split("#")[0] + url = url.split("#", 1)[0] if url.startswith("https://") or url.startswith("http://") or url.startswith("//"): # absolute url possibly on other sites site_url = "/".join(url.split("/", 3)[:3]) domain = url.split("/", 3)[2] @@ -277,7 +283,10 @@ def traverseHref(url, allow_hash=False): return base_url_root_object.restrictedTraverse(str(context.Base_normalizeUrlPathname(base_url + "/" + url, **normalize_kw)[1:])) def replaceFromDataUri(data_uri, replacer): - header, data = data_uri.split(",") + split = data_uri.split(",", 1) + if len(split) != 2: + return data_uri + header, data = split if "text/css" not in header: return data_uri is_base64 = False -- 2.30.9