Commit b8678233 authored by Nicolas Delaby's avatar Nicolas Delaby

Workaround a bug in lxml when include_meta_content_type parameter is not honoured.

This patch will be followed by a Ticket addressed to lxml maintainers.

As soon as a clean patch will be released, this commit must be reverted.



git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@45422 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent ea9b5682
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
from Products.PortalTransforms.interfaces import itransform from Products.PortalTransforms.interfaces import itransform
from zope.interface import implements from zope.interface import implements
from oood_commandtransform import OOOdCommandTransform, OOoDocumentDataStream from oood_commandtransform import OOOdCommandTransform, OOoDocumentDataStream
from oood_commandtransform import includeMetaContentType
from zLOG import LOG from zLOG import LOG
from lxml import etree, html from lxml import etree, html
from lxml.etree import Element, SubElement from lxml.etree import Element, SubElement
...@@ -32,6 +33,7 @@ class HTMLToOdt: ...@@ -32,6 +33,7 @@ class HTMLToOdt:
def convert(self, orig, data, cache=None, filename=None, context=None, **kwargs): def convert(self, orig, data, cache=None, filename=None, context=None, **kwargs):
# Try to recover broken HTML documents, specially regarding encoding used # Try to recover broken HTML documents, specially regarding encoding used
html_node = etree.XML(orig, parser=html_parser) html_node = etree.XML(orig, parser=html_parser)
includeMetaContentType(html_node)
orig = html.tostring(html_node, encoding='utf-8', method='xml', orig = html.tostring(html_node, encoding='utf-8', method='xml',
include_meta_content_type=True) include_meta_content_type=True)
......
...@@ -14,6 +14,7 @@ import re ...@@ -14,6 +14,7 @@ import re
from lxml import etree from lxml import etree
from lxml import html from lxml import html
from lxml.etree import ParseError, Element from lxml.etree import ParseError, Element
from lxml.etree import SubElement
from urllib import unquote from urllib import unquote
from urlparse import urlparse from urlparse import urlparse
...@@ -29,6 +30,20 @@ from Products.ERP5OOo.Document.OOoDocument import OOoServerProxy ...@@ -29,6 +30,20 @@ from Products.ERP5OOo.Document.OOoDocument import OOoServerProxy
from Products.ERP5OOo.Document.OOoDocument import enc from Products.ERP5OOo.Document.OOoDocument import enc
from Products.ERP5OOo.Document.OOoDocument import dec from Products.ERP5OOo.Document.OOoDocument import dec
def includeMetaContentType(html_node):
"""XXX Temp workaround time to fix issue
in lxml when include_meta_content_type is not honoured
Force encondig into utf-8
"""
head = html_node.find('head')
if head is None:
head = SubElement(html_node, 'head')
meta_content_type_node_list = head.xpath('meta[translate('\
'attribute::http-equiv, "CONTEYP", "conteyp") = "content-type"]')
for meta_content_type_node in meta_content_type_node_list:
head.remove(meta_content_type_node)
SubElement(head, 'meta', **{'http-equiv': 'Content-Type',
'content': 'application/xhtml+xml; charset=utf-8'})
CLEAN_RELATIVE_PATH = re.compile('^../') CLEAN_RELATIVE_PATH = re.compile('^../')
...@@ -194,8 +209,11 @@ class OOOdCommandTransform(commandtransform): ...@@ -194,8 +209,11 @@ class OOOdCommandTransform(commandtransform):
parent_node.append(style_node) parent_node.append(style_node)
style_node.attrib.update({'type': 'text/css'}) style_node.attrib.update({'type': 'text/css'})
parent_node.remove(css_link_tag) parent_node.remove(css_link_tag)
includeMetaContentType(xml_doc)
xml_output = html.tostring(xml_doc, encoding='utf-8', method='xml', xml_output = html.tostring(xml_doc, encoding='utf-8', method='xml',
include_meta_content_type=True) include_meta_content_type=True)
xml_output = xml_output.replace('<title/>', '<title></title>') xml_output = xml_output.replace('<title/>', '<title></title>')
return xml_output return xml_output
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment