Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
erp5
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Laurent S
erp5
Commits
e7fee233
Commit
e7fee233
authored
May 17, 2016
by
Tristan Cavelier
Committed by
Sven Franck
Jun 02, 2016
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
erp5_web: add tools to export web page as single file (mhtml or embedded html)
parent
7fd7a9eb
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
952 additions
and
0 deletions
+952
-0
bt5/erp5_web/ExtensionTemplateItem/portal_components/extension.erp5.WebUtility.py
...mplateItem/portal_components/extension.erp5.WebUtility.py
+133
-0
bt5/erp5_web/ExtensionTemplateItem/portal_components/extension.erp5.WebUtility.xml
...plateItem/portal_components/extension.erp5.WebUtility.xml
+102
-0
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_formatAttachmentListToMIMEMultipartString.py
...rp5_web/Base_formatAttachmentListToMIMEMultipartString.py
+155
-0
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_formatAttachmentListToMIMEMultipartString.xml
...p5_web/Base_formatAttachmentListToMIMEMultipartString.xml
+62
-0
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_parseCssForUrl.xml
...emplateItem/portal_skins/erp5_web/Base_parseCssForUrl.xml
+28
-0
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_parseHtml.xml
...SkinTemplateItem/portal_skins/erp5_web/Base_parseHtml.xml
+28
-0
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/ERP5Site_getWebSiteDomainDict.py
...em/portal_skins/erp5_web/ERP5Site_getWebSiteDomainDict.py
+8
-0
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/ERP5Site_getWebSiteDomainDict.xml
...m/portal_skins/erp5_web/ERP5Site_getWebSiteDomainDict.xml
+62
-0
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_exportAsSingleFile.py
...eItem/portal_skins/erp5_web/WebPage_exportAsSingleFile.py
+312
-0
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_exportAsSingleFile.xml
...Item/portal_skins/erp5_web/WebPage_exportAsSingleFile.xml
+62
-0
No files found.
bt5/erp5_web/ExtensionTemplateItem/portal_components/extension.erp5.WebUtility.py
0 → 100644
View file @
e7fee233
##############################################################################
#
# Copyright (c) 2016 Nexedi SA and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly advised to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from
HTMLParser
import
HTMLParser
class
HtmlParseHelper
(
HTMLParser
):
"""
Listens to all the HTMLParser methods and push results in a list of tuple.
Tuple contains every method arguments, for instance the `handle_starttag`
method pushes `('starttag', tag, attrs)` to the tuple list.
See https://docs.python.org/2/library/htmlparser.html
"""
def
__init__
(
self
,
*
args
,
**
kw
):
HTMLParser
.
__init__
(
self
,
*
args
,
**
kw
)
self
.
result
=
[]
def
handle_starttag
(
self
,
tag
,
attrs
):
self
.
result
.
append
((
"starttag"
,
tag
,
attrs
))
def
handle_startendtag
(
self
,
tag
,
attrs
):
self
.
result
.
append
((
"startendtag"
,
tag
,
attrs
))
def
handle_endtag
(
self
,
tag
):
self
.
result
.
append
((
"endtag"
,
tag
))
def
handle_data
(
self
,
data
):
self
.
result
.
append
((
"data"
,
data
))
def
handle_entityref
(
self
,
name
):
self
.
result
.
append
((
"entityref"
,
name
))
def
handle_charref
(
self
,
name
):
self
.
result
.
append
((
"charref"
,
name
))
def
handle_comment
(
self
,
data
):
self
.
result
.
append
((
"comment"
,
data
))
def
handle_decl
(
self
,
decl
):
self
.
result
.
append
((
"decl"
,
decl
))
def
handle_pi
(
self
,
data
):
self
.
result
.
append
((
"pi"
,
data
))
def
unknown_decl
(
self
,
data
):
self
.
result
.
append
((
"unknown_decl"
,
data
))
def
parseHtml
(
text
):
"""
Parses a string and returns html parts as tuple list.
Example:
input: 'Click <a href="destination">here</a> to see the documentation.'
return: [
('data', 'Click '),
('starttag', 'a', ('href', 'destination')),
('data', 'here'),
('endtag', 'a'),
('data', ' to see the documentation'),
]
"""
hr
=
HtmlParseHelper
()
hr
.
feed
(
text
)
hr
.
close
()
return
hr
.
result
import
re
def
partition
(
text
,
separatorRegexp
):
"""
partition("abcba", re.compile("(b)")) -> [
("a",),
("b", "b"),
("c",),
("b", "b"),
("a",),
]
"""
result
=
[]
lastIndex
=
0
for
match
in
separatorRegexp
.
finditer
(
text
):
result
.
append
((
text
[
lastIndex
:
match
.
start
()],))
result
.
append
((
match
.
group
(
0
),)
+
match
.
groups
())
lastIndex
=
match
.
end
()
result
.
append
((
text
[
lastIndex
:],))
return
result
css_comment_filter_re
=
re
.
compile
(
r"/\
*((?:[^
\*]|\
*[^/])*)
\*/"
)
#css_url_re = re.compile(r"""(:[ \t]*url\()((")([^"]*)"|(')([^']*)'|([^\)]*))\)""")
css_url_re
=
re
.
compile
(
r"""(:[ \t]*url\
()(
\s*(")([^"]*)"\
s*|
\s*(')([^']*)'\
s*|([^
\)]*))\
)
""")
def parseCssForUrl(text):
"""
return
tuple
list
like
:
[
(
"data"
,
""
),
(
"comment"
,
"/* set body background image */"
,
" set body background image "
),
(
"data"
,
"
\
n
body {
\
n
background-image: url("
),
(
"url"
,
" 'http://ima.ge/bg.png' "
,
"http://ima.ge/bg.png"
,
"'"
),
(
"data"
,
");
\
n
}
\
n
"
),
]
"""
result = []
parts = partition(text, css_comment_filter_re) # filter comments
i = 0
for part in parts:
i += 1
if i % 2 == 0: # comment
result.append(("comment", part[0], part[1]))
else: # non comment
parts = partition(part[0], css_url_re)
data = ""
j = 0
for part in parts:
j += 1
if j % 2 == 1: # css data
data += part[0]
else: # url
result.append(("data", data + part[1]))
result.append(("url", part[2], (part[4] or part[6] or part[7] or "").strip(), part[3] or part[5] or ""))
data = ")"
result.append(("data", data))
return result
bt5/erp5_web/ExtensionTemplateItem/portal_components/extension.erp5.WebUtility.xml
0 → 100644
View file @
e7fee233
<?xml version="1.0"?>
<ZopeData>
<record
id=
"1"
aka=
"AAAAAAAAAAE="
>
<pickle>
<global
name=
"Extension Component"
module=
"erp5.portal_type"
/>
</pickle>
<pickle>
<dictionary>
<item>
<key>
<string>
default_reference
</string>
</key>
<value>
<string>
WebUtility
</string>
</value>
</item>
<item>
<key>
<string>
description
</string>
</key>
<value>
<none/>
</value>
</item>
<item>
<key>
<string>
id
</string>
</key>
<value>
<string>
extension.erp5.WebUtility
</string>
</value>
</item>
<item>
<key>
<string>
portal_type
</string>
</key>
<value>
<string>
Extension Component
</string>
</value>
</item>
<item>
<key>
<string>
sid
</string>
</key>
<value>
<none/>
</value>
</item>
<item>
<key>
<string>
text_content_error_message
</string>
</key>
<value>
<tuple/>
</value>
</item>
<item>
<key>
<string>
text_content_warning_message
</string>
</key>
<value>
<tuple/>
</value>
</item>
<item>
<key>
<string>
version
</string>
</key>
<value>
<string>
erp5
</string>
</value>
</item>
<item>
<key>
<string>
workflow_history
</string>
</key>
<value>
<persistent>
<string
encoding=
"base64"
>
AAAAAAAAAAI=
</string>
</persistent>
</value>
</item>
</dictionary>
</pickle>
</record>
<record
id=
"2"
aka=
"AAAAAAAAAAI="
>
<pickle>
<global
name=
"PersistentMapping"
module=
"Persistence.mapping"
/>
</pickle>
<pickle>
<dictionary>
<item>
<key>
<string>
data
</string>
</key>
<value>
<dictionary>
<item>
<key>
<string>
component_validation_workflow
</string>
</key>
<value>
<persistent>
<string
encoding=
"base64"
>
AAAAAAAAAAM=
</string>
</persistent>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</pickle>
</record>
<record
id=
"3"
aka=
"AAAAAAAAAAM="
>
<pickle>
<global
name=
"WorkflowHistoryList"
module=
"Products.ERP5Type.patches.WorkflowTool"
/>
</pickle>
<pickle>
<tuple>
<none/>
<list>
<dictionary>
<item>
<key>
<string>
action
</string>
</key>
<value>
<string>
validate
</string>
</value>
</item>
<item>
<key>
<string>
validation_state
</string>
</key>
<value>
<string>
validated
</string>
</value>
</item>
</dictionary>
</list>
</tuple>
</pickle>
</record>
</ZopeData>
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_formatAttachmentListToMIMEMultipartString.py
0 → 100644
View file @
e7fee233
"""
Usage:
formatAttachmentListToMIMEMultipartString(
subtype="related",
header_dict={
"From": "<Saved by ERP5>",
"Subject": "Document Title",
},
param_list=[("type", "text/html")],
attachment_list=[
{
"mime_type": "text/html",
"charset": "utf-8",
"encode": "quoted-printable",
"header_dict": {"Content-Location": "https://www.erp5.com/My.Web.Page"}, # only add headers
"data": "<!DOCTYPE ...>.....................</...>",
},
{
"mime_type": "image/png",
"add_header_list": [("Content-Location", "https://www.erp5.com/My.Image")],
"data": "
\
x00
............
\
x01
",
}
]
);
Only attachtment_list property is mandatory.
Note: text/* content will not be automatically encoded to quoted-printable
because this encoding can lose some characters like "
\
r
" and possibly others.
Default text/* is encoded in 7or8bit.
To send specific encoded data, please make your attachment dict look like:
{
"mime_type": "text/html",
"encode": "noop",
"add_header_list": [("Content-Transfer-Encoding", "my-encoding")],
"data": encodestring(html_data),
}
"""
from
email.encoders
import
encode_noop
,
encode_7or8bit
,
\
encode_base64
as
original_encode_base64
from
email.mime.base
import
MIMEBase
from
email.mime.text
import
MIMEText
from
email.mime.image
import
MIMEImage
from
email.mime.audio
import
MIMEAudio
from
email.mime.application
import
MIMEApplication
from
email.mime.multipart
import
MIMEMultipart
import
quopri
def
formatMultipartMessageToRFC2822String
(
msg
):
"""
The `msg.as_string()` method does not exactly follow the RFC2822. The EOL are
not CRLF ("
\
r
\
n
") by default, so we have to replace the actual newlines
(LF "
\
n
") by CRLF if necessary.
Note: The first space in each line of a multiline header will be replaced by a
tabulation to make some mhtml viewers able to parse it, even if a simple space
follows the RFC2822.
"""
as_string
=
msg
.
as_string
()
# it also forces the boundary generation
if
as_string
.
split
(
"
\
n
"
,
1
)[
0
].
endswith
(
"
\
r
"
):
return
as_string
boundary
=
msg
.
get_boundary
()
parts
=
as_string
.
split
(
"
\
n
--"
+
boundary
)
parts
[
0
]
=
"
\
r
\
n
"
.
join
(
parts
[
0
].
split
(
"
\
n
"
)).
replace
(
"
\
r
\
n
"
,
"
\
r
\
n
\
t
"
)
i
=
0
for
part
in
parts
[
1
:]:
i
+=
1
partsplit
=
part
.
split
(
"
\
n
\
n
"
,
1
)
partsplit
[
0
]
=
"
\
r
\
n
"
.
join
(
partsplit
[
0
].
split
(
"
\
n
"
)).
replace
(
"
\
r
\
n
"
,
"
\
r
\
n
\
t
"
)
parts
[
i
]
=
"
\
r
\
n
\
r
\
n
"
.
join
(
partsplit
)
return
(
"
\
r
\
n
--"
+
boundary
).
join
(
parts
)
def
encode_quopri
(
msg
):
"""Same as encoders.encode_quopri except that spaces are kept
when possible and end of lines are converted to CRLF ("
\
r
\
n
")
when necessary.
"""
orig
=
msg
.
get_payload
()
encdata
=
quopri
.
encodestring
(
orig
).
replace
(
"=
\
n
"
,
"=
\
r
\
n
"
)
msg
.
set_payload
(
encdata
)
msg
.
add_header
(
"Content-Transfer-Encoding"
,
"quoted-printable"
)
def
encode_base64
(
msg
):
"""Extend encoders.encode_base64 to return CRLF at end of lines"""
original_encode_base64
(
msg
)
msg
.
set_payload
(
msg
.
get_payload
().
replace
(
"
\
n
"
,
"
\
r
\
n
"
))
outer
=
MIMEMultipart
(
subtype
)
for
key
,
value
in
param_list
:
outer
.
set_param
(
key
,
value
)
if
boundary
is
not
None
:
outer
.
set_boundary
(
boundary
)
if
replace_header_list
is
not
None
:
for
key
,
value
in
replace_header_list
:
outer
.
replace_header
(
key
,
value
)
if
header_dict
is
not
None
:
# adds headers, does not replace or set
for
key
,
value
in
header_dict
.
items
():
outer
.
add_header
(
key
,
value
)
if
add_header_list
is
not
None
:
for
key
,
value
in
add_header_list
:
outer
.
add_header
(
key
,
value
)
for
attachment
in
attachment_list
:
mime_type
=
attachment
.
get
(
"mime_type"
,
"application/octet-stream"
)
data
=
attachment
.
get
(
"data"
,
""
)
encoding
=
attachment
.
get
(
"encode"
)
if
encoding
not
in
(
"base64"
,
"quoted-printable"
,
"7or8bit"
,
"noop"
,
None
):
raise
ValueError
(
"unknown attachment encoding %r"
%
encoding
)
main_type
,
sub_type
=
mime_type
.
split
(
"/"
)
if
encoding
is
None
:
if
main_type
==
"image"
:
if
sub_type
==
"svg+xml"
:
part
=
MIMEImage
(
data
,
sub_type
,
encode_quopri
)
# should we trust the mime_type ?
else
:
part
=
MIMEImage
(
data
,
sub_type
,
encode_base64
)
elif
main_type
==
"text"
:
part
=
MIMEText
(
data
,
sub_type
,
attachment
.
get
(
"charset"
,
"us-ascii"
))
elif
main_type
==
"audio"
:
part
=
MIMEAudio
(
data
,
sub_type
,
encode_base64
)
elif
main_type
==
"application"
:
part
=
MIMEApplication
(
data
,
sub_type
,
encode_noop
)
if
sub_type
==
"javascript"
:
encode_quopri
(
part
)
else
:
encode_base64
(
part
)
else
:
part
=
MIMEBase
(
main_type
,
sub_type
)
part
.
set_payload
(
data
)
encode_base64
(
part
)
else
:
part
=
MIMEBase
(
main_type
,
sub_type
)
part
.
set_payload
(
data
)
if
encoding
==
"base64"
:
encode_base64
(
part
)
elif
encoding
==
"quoted-printable"
:
encode_quopri
(
part
)
elif
encoding
==
"7or8bit"
:
encode_7or8bit
(
part
)
else
:
# elif encoding == "noop":
encode_noop
(
part
)
for
key
,
value
in
attachment
.
get
(
"replace_header_list"
,
[]):
part
.
replace_header
(
key
,
value
)
for
key
,
value
in
attachment
.
get
(
"header_dict"
,
{}).
items
():
# adds headers, does not replace or set
part
.
add_header
(
key
,
value
)
for
key
,
value
in
attachment
.
get
(
"add_header_list"
,
[]):
part
.
add_header
(
key
,
value
)
if
attachment
.
get
(
"filename"
,
None
)
is
not
None
:
part
.
add_header
(
"Content-Disposition"
,
"attachment"
,
attachment
[
"filename"
])
outer
.
attach
(
part
)
#return outer.as_string()
return
formatMultipartMessageToRFC2822String
(
outer
)
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_formatAttachmentListToMIMEMultipartString.xml
0 → 100644
View file @
e7fee233
<?xml version="1.0"?>
<ZopeData>
<record
id=
"1"
aka=
"AAAAAAAAAAE="
>
<pickle>
<global
name=
"PythonScript"
module=
"Products.PythonScripts.PythonScript"
/>
</pickle>
<pickle>
<dictionary>
<item>
<key>
<string>
Script_magic
</string>
</key>
<value>
<int>
3
</int>
</value>
</item>
<item>
<key>
<string>
_bind_names
</string>
</key>
<value>
<object>
<klass>
<global
name=
"NameAssignments"
module=
"Shared.DC.Scripts.Bindings"
/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key>
<string>
_asgns
</string>
</key>
<value>
<dictionary>
<item>
<key>
<string>
name_container
</string>
</key>
<value>
<string>
container
</string>
</value>
</item>
<item>
<key>
<string>
name_context
</string>
</key>
<value>
<string>
context
</string>
</value>
</item>
<item>
<key>
<string>
name_m_self
</string>
</key>
<value>
<string>
script
</string>
</value>
</item>
<item>
<key>
<string>
name_subpath
</string>
</key>
<value>
<string>
traverse_subpath
</string>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key>
<string>
_params
</string>
</key>
<value>
<string>
attachment_list, subtype="mixed", header_dict=None, param_list=(), replace_header_list=None, add_header_list=None, boundary=None
</string>
</value>
</item>
<item>
<key>
<string>
id
</string>
</key>
<value>
<string>
Base_formatAttachmentListToMIMEMultipartString
</string>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_parseCssForUrl.xml
0 → 100644
View file @
e7fee233
<?xml version="1.0"?>
<ZopeData>
<record
id=
"1"
aka=
"AAAAAAAAAAE="
>
<pickle>
<global
name=
"ExternalMethod"
module=
"Products.ExternalMethod.ExternalMethod"
/>
</pickle>
<pickle>
<dictionary>
<item>
<key>
<string>
_function
</string>
</key>
<value>
<string>
parseCssForUrl
</string>
</value>
</item>
<item>
<key>
<string>
_module
</string>
</key>
<value>
<string>
WebUtility
</string>
</value>
</item>
<item>
<key>
<string>
id
</string>
</key>
<value>
<string>
Base_parseCssForUrl
</string>
</value>
</item>
<item>
<key>
<string>
title
</string>
</key>
<value>
<string></string>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/Base_parseHtml.xml
0 → 100644
View file @
e7fee233
<?xml version="1.0"?>
<ZopeData>
<record
id=
"1"
aka=
"AAAAAAAAAAE="
>
<pickle>
<global
name=
"ExternalMethod"
module=
"Products.ExternalMethod.ExternalMethod"
/>
</pickle>
<pickle>
<dictionary>
<item>
<key>
<string>
_function
</string>
</key>
<value>
<string>
parseHtml
</string>
</value>
</item>
<item>
<key>
<string>
_module
</string>
</key>
<value>
<string>
WebUtility
</string>
</value>
</item>
<item>
<key>
<string>
id
</string>
</key>
<value>
<string>
Base_parseHtml
</string>
</value>
</item>
<item>
<key>
<string>
title
</string>
</key>
<value>
<string></string>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/ERP5Site_getWebSiteDomainDict.py
0 → 100644
View file @
e7fee233
# TODO: domain names should be exported to a web site property.
# domain_dict = {}
# for web_site in portal_catalog(portal_type="Web Site", validation_state="published"):
# domain = web_site.getDomainName("")
# if domain != "":
# domain_dict[domain] = web_site
# return domain_dict
return
{}
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/ERP5Site_getWebSiteDomainDict.xml
0 → 100644
View file @
e7fee233
<?xml version="1.0"?>
<ZopeData>
<record
id=
"1"
aka=
"AAAAAAAAAAE="
>
<pickle>
<global
name=
"PythonScript"
module=
"Products.PythonScripts.PythonScript"
/>
</pickle>
<pickle>
<dictionary>
<item>
<key>
<string>
Script_magic
</string>
</key>
<value>
<int>
3
</int>
</value>
</item>
<item>
<key>
<string>
_bind_names
</string>
</key>
<value>
<object>
<klass>
<global
name=
"NameAssignments"
module=
"Shared.DC.Scripts.Bindings"
/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key>
<string>
_asgns
</string>
</key>
<value>
<dictionary>
<item>
<key>
<string>
name_container
</string>
</key>
<value>
<string>
container
</string>
</value>
</item>
<item>
<key>
<string>
name_context
</string>
</key>
<value>
<string>
context
</string>
</value>
</item>
<item>
<key>
<string>
name_m_self
</string>
</key>
<value>
<string>
script
</string>
</value>
</item>
<item>
<key>
<string>
name_subpath
</string>
</key>
<value>
<string>
traverse_subpath
</string>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key>
<string>
_params
</string>
</key>
<value>
<string></string>
</value>
</item>
<item>
<key>
<string>
id
</string>
</key>
<value>
<string>
ERP5Site_getWebSiteDomainDict
</string>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_exportAsSingleFile.py
0 → 100644
View file @
e7fee233
"""
Export the web page and its components to a single (m)html file.
`format` parameter could also be "mhtml".
TODO: export same components into one mhtml attachment if possible.
"""
from
zExceptions
import
Unauthorized
from
base64
import
b64encode
,
b64decode
portal
=
context
.
getPortalObject
()
mhtml_message
=
{
"subtype"
:
"related"
,
"param_list"
:
[(
"type"
,
"text/html"
)],
"header_dict"
:
{
"From"
:
"<Saved by ERP5>"
,
"Subject"
:
"Untitled"
},
"attachment_list"
:
[],
}
def
main
():
data
=
context
.
getTextContent
(
""
).
decode
(
"utf-8"
)
data
=
""
.
join
([
fn
(
p
)
for
fn
,
p
in
handleHtmlPartList
(
parseHtml
(
data
))])
if
format
==
"mhtml"
:
mhtml_message
[
"header_dict"
][
"Subject"
]
=
context
.
getTitle
()
or
"Untitled"
mhtml_message
[
"attachment_list"
].
insert
(
0
,
{
"mime_type"
:
"text/html"
,
"encode"
:
"quoted-printable"
,
"add_header_list"
:
[(
"Content-Location"
,
context
.
absolute_url
())],
"data"
:
str
(
data
.
encode
(
"utf-8"
)),
})
res
=
context
.
Base_formatAttachmentListToMIMEMultipartString
(
**
mhtml_message
)
if
REQUEST
is
not
None
:
REQUEST
.
RESPONSE
.
setHeader
(
"Content-Type"
,
"multipart/related"
)
REQUEST
.
RESPONSE
.
setHeader
(
"Content-Disposition"
,
'attachment;filename="%s-%s-%s.mhtml"'
%
(
context
.
getReference
(
"untitled"
).
replace
(
'"'
,
'
\
\
"'
),
context
.
getVersion
(
"001"
).
replace
(
'"'
,
'
\
\
"'
),
context
.
getLanguage
(
"en"
).
replace
(
'"'
,
'
\
\
"'
),
))
return
res
if
REQUEST
is
not
None
:
REQUEST
.
RESPONSE
.
setHeader
(
"Content-Type"
,
"text/html"
)
REQUEST
.
RESPONSE
.
setHeader
(
"Content-Disposition"
,
'attachment;filename="%s-%s-%s.html"'
%
(
context
.
getReference
(
"untitled"
).
replace
(
'"'
,
'
\
\
"'
),
context
.
getVersion
(
"001"
).
replace
(
'"'
,
'
\
\
"'
),
context
.
getLanguage
(
"en"
).
replace
(
'"'
,
'
\
\
"'
),
))
return
data
def
handleHtmlTag
(
tag
,
attrs
):
#if tag == "base": and "href" in attrs: # should not exist in safe-html
# NotImplemented
if
tag
==
"object"
:
for
i
in
range
(
len
(
attrs
)):
if
attrs
[
i
][
0
]
==
"data"
:
attrs
[
i
]
=
attrs
[
i
][
0
],
handleImageSource
(
attrs
[
i
][
1
])
elif
tag
==
"img"
:
for
i
in
range
(
len
(
attrs
)):
if
attrs
[
i
][
0
]
==
"src"
:
attrs
[
i
]
=
attrs
[
i
][
0
],
handleImageSource
(
attrs
[
i
][
1
])
elif
tag
==
"link"
and
anny
(
attrs
,
key
=
lambda
a
:
a
[
0
]
==
"rel"
and
a
[
1
]
==
"stylesheet"
):
for
i
in
range
(
len
(
attrs
)):
if
attrs
[
i
][
0
]
==
"href"
:
attrs
[
i
]
=
attrs
[
i
][
0
],
replaceFromDataUri
(
handleCssHref
(
attrs
[
i
][
1
]),
replaceCssUrl
)
elif
tag
==
"script"
:
for
i
in
range
(
len
(
attrs
)):
if
attrs
[
i
][
0
]
==
"src"
:
attrs
[
i
]
=
attrs
[
i
][
0
],
handleJsSource
(
attrs
[
i
][
1
])
else
:
for
i
in
range
(
len
(
attrs
)):
if
attrs
[
i
][
0
]
==
"href"
or
attrs
[
i
][
0
]
==
"src"
:
attrs
[
i
]
=
attrs
[
i
][
0
],
makeHrefAbsolute
(
attrs
[
i
][
1
])
return
tag
,
attrs
def
strHtmlPart
(
part
):
part_type
=
part
[
0
]
if
part_type
in
(
"starttag"
,
"startendtag"
):
tag
,
attrs
=
handleHtmlTag
(
part
[
1
],
part
[
2
])
attrs_str
=
" "
.
join
([
"%s=
\
"
%s
\
"
"
%
(
escapeHtml
(
k
),
escapeHtml
(
v
or
""
))
for
k
,
v
in
attrs
])
return
"<%s%s%s>"
%
(
tag
,
" "
+
attrs_str
if
attrs_str
else
""
,
" /"
if
part_type
==
"startendtag"
else
""
)
if
part_type
==
"endtag"
:
return
"</%s>"
%
part
[
1
]
if
part_type
==
"data"
:
return
part
[
1
]
if
part_type
==
"entityref"
:
return
"&%s;"
%
part
[
1
]
if
part_type
==
"charref"
:
return
"&#%s;"
%
part
[
1
]
if
part_type
==
"comment"
:
return
"<!--%s-->"
%
part
[
1
]
if
part_type
in
(
"decl"
,
"unknown_decl"
):
return
"<!%s>"
%
part
[
1
]
if
part_type
==
"pi"
:
return
"<?%s>"
%
part
[
1
]
disallow_script
=
not
allow_script
def
handleHtmlPartList
(
part_list
):
res
=
[]
style_data
=
""
on_script
=
False
on_style
=
False
for
part
in
part_list
:
if
on_script
:
if
part
[
0
]
==
"endtag"
and
part
[
1
]
==
"script"
:
on_script
=
False
# can only be data until </script> endtag
elif
on_style
:
if
part
[
0
]
==
"endtag"
and
part
[
1
]
==
"style"
:
res
.
append
((
replaceCssUrl
,
style_data
))
res
.
append
((
strHtmlPart
,
part
))
style_data
=
""
on_style
=
False
else
:
# can only be data until </style> endtag
style_data
+=
strHtmlPart
(
part
)
else
:
if
part
[
0
]
==
"starttag"
:
# when you save a page from a browser, every script tag are removed
if
part
[
1
]
==
"script"
and
disallow_script
:
on_script
=
True
continue
elif
part
[
1
]
==
"style"
:
on_style
=
True
res
.
append
((
strHtmlPart
,
part
))
return
res
def
handleCssHref
(
href
):
return
handleHref
(
href
)
def
handleJsSource
(
href
):
return
handleHref
(
href
)
def
handleHref
(
href
):
if
not
isHrefAUrl
(
href
):
return
href
try
:
o
=
traverseHref
(
href
)
except
(
KeyError
,
Unauthorized
):
return
makeHrefAbsolute
(
href
)
return
handleHrefObject
(
o
,
href
)
def
handleImageSource
(
src
):
if
not
isHrefAUrl
(
src
):
return
src
try
:
o
=
traverseHref
(
src
)
except
(
KeyError
,
Unauthorized
):
return
makeHrefAbsolute
(
src
)
return
handleImageSourceObject
(
o
,
src
)
def
replaceCssUrl
(
data
):
parts
=
context
.
Base_parseCssForUrl
(
data
)
data
=
""
for
part
in
parts
:
if
part
[
0
]
==
"url"
:
url
=
part
[
2
]
if
isHrefAUrl
(
url
):
data
+=
handleImageSource
(
url
)
else
:
data
+=
part
[
1
]
else
:
data
+=
part
[
1
]
return
data
def
handleImageSourceObject
(
o
,
src
):
if
hasattr
(
o
,
"convert"
):
search
=
parseUrlSearch
(
extractUrlSearch
(
src
))
format_kw
=
{}
for
k
,
x
in
search
:
if
k
==
"format"
and
x
is
not
None
:
format_kw
[
"format"
]
=
x
elif
k
==
"display"
and
x
is
not
None
:
format_kw
[
"display"
]
=
x
if
format_kw
:
mime
,
data
=
o
.
convert
(
**
format_kw
)
return
handleLinkedData
(
mime
,
data
,
src
)
return
handleHrefObject
(
o
,
src
,
default_mimetype
=
bad_image_mime_type
,
default_data
=
bad_image_data
)
def
handleHrefObject
(
o
,
src
,
default_mimetype
=
"text/html"
,
default_data
=
"<p>Linked page not found</p>"
):
# handle File portal_skins/folder/file.png
# XXX handle "?portal_skin=" parameter ?
if
hasattr
(
o
,
"getContentType"
):
mime
=
o
.
getContentType
(
""
)
if
mime
:
data
=
getattr
(
o
,
"getData"
,
lambda
:
str
(
o
))()
or
""
if
isinstance
(
data
,
unicode
):
data
=
data
.
encode
(
"utf-8"
)
return
handleLinkedData
(
mime
,
data
,
src
)
return
handleLinkedData
(
default_mimetype
,
default_data
,
src
)
# handle Object.view
# XXX handle url query parameters ? Not so easy because we need to
# use the same behavior as when we call a script from browser URL bar.
if
not
hasattr
(
o
,
"getPortalType"
)
and
callable
(
o
):
mime
,
data
=
"text/html"
,
o
()
if
isinstance
(
data
,
unicode
):
data
=
data
.
encode
(
"utf-8"
)
return
handleLinkedData
(
mime
,
data
,
src
)
return
handleLinkedData
(
default_mimetype
,
default_data
,
src
)
bad_image_data_url
=
(
"data:image/png;base64,"
+
# little image showing cannot load image
"iVBORw0KGgoAAAANSUhEUgAAABEAAAATCAIAAAD5x3GmAAACWklEQVQokZWSX0hTcRTHz/"
+
"3TunMmMyxrQUzEPQSCFEI0fCi0HmSKdsUGg3q2h5I99dj7bE8Nw6cwLDb3kO7JP5m6h0TE"
+
"CmYQjJYgpaPc7q67+93de04P0zvnQ+CP78Pvdzgfzjnf3+GICE55+NMCACACACKOj49rmv"
+
"afvNHRUZ4/KkBEjLFQKJRTjXyRTqigUSwWI6JQKGSaJhEREQ8ApmkCgFrif+8bJ7RfMAGA"
+
"MRYMBsPhMCLWzFPUUdVI1cjjEj0usXLXdLJ6sTCx2jIBAd1otVVe11vPbKT1iqeJRMLKKp"
+
"fLVYaoChxGEAwDbt0ZsNs4ABAEbiLyoqYOEax/ZyfsYmX4q5iCAABQd1aoen3UGmDt/zod"
+
"/EWnuJczcgcIABzHu91um81W9YCI8Jga6rirqUV41O9pQqeDR6J6iRvs7VUeDFQZJCKEih"
+
"DxfINemIioq4ms7GtrwkaH4KovZ2WfujLL1/SGiIgZZSmavj2Veto0GYXO7vzawo7saztX"
+
"3JF9+bUF6Oyu8YAAtnLvNrJBAOPb7lbkizQyPZuWfX8+LeTaG00NHDe7r8Rmju0oQaawVA"
+
"Eqga+/Xkc+B1vexDSJzx+AJvEtk1FDEHjLAEXfXdt7ZgEA0H754UjH2GZgWFGR2UVFxc3A"
+
"sIh4yDDGFjPPdfxhAdea/Y87xpJy//bqnN3b05XK2r0928n55P2+w3kMw9CXmy/AE4u5Fw"
+
"h89A/tLM9d6urxTr9/G4/74zMfBvt+rsxzRKTruqIojNUsgSRJB+vrqVcv705Fc8ViqVSS"
+
"JMnpcMz5h/4B1Qxz9NOjZCgAAAAASUVORK5CYII="
)
bad_image_data
=
b64decode
(
bad_image_data_url
.
split
(
","
,
1
)[
1
])
bad_image_mime_type
=
"image/png"
request_protocol
=
context
.
REQUEST
.
SERVER_URL
.
split
(
":"
,
1
)[
0
]
+
":"
site_object_dict
=
context
.
ERP5Site_getWebSiteDomainDict
()
base_url_root_object
=
portal
base_url_object
=
context
def
handleLinkedData
(
mime
,
data
,
href
):
if
format
==
"mhtml"
:
url
=
makeHrefAbsolute
(
href
)
mhtml_message
[
"attachment_list"
].
append
({
"mime_type"
:
mime
,
"encode"
:
"quoted-printable"
if
mime
.
startswith
(
"text/"
)
else
None
,
"add_header_list"
:
[(
"Content-Location"
,
url
)],
"data"
:
str
(
data
),
})
return
url
else
:
return
"data:%s;base64,%s"
%
(
mime
,
b64encode
(
data
))
def
makeHrefAbsolute
(
href
):
if
isHrefAnAbsoluteUrl
(
href
)
or
not
isHrefAUrl
(
href
):
return
href
if
href
.
startswith
(
"//"
):
return
request_protocol
+
href
if
href
.
startswith
(
"/"
):
return
base_url_root_object
.
absolute_url
()
+
href
return
base_url_object
.
absolute_url
()
+
"/"
+
href
def
isHrefAnAbsoluteUrl
(
href
):
return
href
.
startswith
(
"https://"
)
or
href
.
startswith
(
"http://"
)
def
isHrefAUrl
(
href
):
return
href
.
startswith
(
"https://"
)
or
href
.
startswith
(
"http://"
)
or
not
href
.
split
(
":"
,
1
)[
0
].
isalpha
()
def
traverseHref
(
url
,
allow_hash
=
False
):
url
=
url
.
split
(
"?"
)[
0
]
if
not
allow_hash
:
url
=
url
.
split
(
"#"
)[
0
]
if
url
.
startswith
(
"https://"
)
or
url
.
startswith
(
"http://"
)
or
url
.
startswith
(
"//"
):
# absolute url possibly on other sites
site_url
=
"/"
.
join
(
url
.
split
(
"/"
,
3
)[:
3
])
domain
=
url
.
split
(
"/"
,
3
)[
2
]
relative_path
=
url
[
len
(
site_url
):]
relative_path
=
(
relative_path
[
1
:]
if
relative_path
[:
1
]
==
"/"
else
relative_path
)
site_object
=
site_object_dict
.
get
(
domain
)
if
site_object
is
None
:
raise
KeyError
(
relative_path
.
split
(
"/"
)[
0
])
return
site_object
.
restrictedTraverse
(
str
(
relative_path
))
if
url
.
startswith
(
"/"
):
# absolute path, relative url
return
base_url_root_object
.
restrictedTraverse
(
str
(
url
[
1
:]))
# relative url (just use a base url)
return
base_url_object
.
restrictedTraverse
(
str
(
url
))
def
replaceFromDataUri
(
data_uri
,
replacer
):
header
,
data
=
data_uri
.
split
(
","
)
if
"text/css"
not
in
header
:
return
data_uri
is_base64
=
False
if
";base64"
in
header
:
is_base64
=
True
data
=
b64decode
(
data
)
data
=
replacer
(
data
)
return
"%s,%s"
%
(
header
,
b64encode
(
data
)
if
is_base64
else
data
)
def
extractUrlSearch
(
url
):
url
=
url
.
split
(
"#"
,
1
)[
0
].
split
(
"?"
,
1
)
url
[
0
]
=
""
return
"?"
.
join
(
url
)
def
parseUrlSearch
(
search
):
if
search
[:
1
]
==
"?"
:
search
=
search
[
1
:]
result
=
[]
for
part
in
search
.
split
(
"&"
):
k
=
part
.
split
(
"="
)
v
=
"="
.
join
(
k
[
1
:])
if
len
(
k
)
else
None
result
.
append
((
k
[
0
],
v
))
return
result
def
parseHtml
(
text
):
return
context
.
Base_parseHtml
(
text
)
def
escapeHtml
(
s
):
return
s
.
replace
(
"&"
,
"&"
).
replace
(
"<"
,
"<"
).
replace
(
">"
,
">"
).
replace
(
"
\
"
"
,
"""
)
def
anny
(
iterable
,
key
=
None
):
for
i
in
iterable
:
if
key
:
i
=
key
(
i
)
if
i
:
return
True
return
False
return
main
()
bt5/erp5_web/SkinTemplateItem/portal_skins/erp5_web/WebPage_exportAsSingleFile.xml
0 → 100644
View file @
e7fee233
<?xml version="1.0"?>
<ZopeData>
<record
id=
"1"
aka=
"AAAAAAAAAAE="
>
<pickle>
<global
name=
"PythonScript"
module=
"Products.PythonScripts.PythonScript"
/>
</pickle>
<pickle>
<dictionary>
<item>
<key>
<string>
Script_magic
</string>
</key>
<value>
<int>
3
</int>
</value>
</item>
<item>
<key>
<string>
_bind_names
</string>
</key>
<value>
<object>
<klass>
<global
name=
"NameAssignments"
module=
"Shared.DC.Scripts.Bindings"
/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key>
<string>
_asgns
</string>
</key>
<value>
<dictionary>
<item>
<key>
<string>
name_container
</string>
</key>
<value>
<string>
container
</string>
</value>
</item>
<item>
<key>
<string>
name_context
</string>
</key>
<value>
<string>
context
</string>
</value>
</item>
<item>
<key>
<string>
name_m_self
</string>
</key>
<value>
<string>
script
</string>
</value>
</item>
<item>
<key>
<string>
name_subpath
</string>
</key>
<value>
<string>
traverse_subpath
</string>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key>
<string>
_params
</string>
</key>
<value>
<string>
REQUEST=None, allow_script=False, format="embedded_html"
</string>
</value>
</item>
<item>
<key>
<string>
id
</string>
</key>
<value>
<string>
WebPage_exportAsSingleFile
</string>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment