oood_commandtransform.py 7.49 KB
Newer Older
1
# -*- coding: utf-8 -*-
2 3 4
from Products.PortalTransforms.libtransforms.commandtransform import commandtransform
from Products.PortalTransforms.interfaces import idatastream
from Products.ERP5Type.Document import newTempOOoDocument
5
from Products.ERP5.Document.Document import ConversionError
6
from Products.CMFCore.utils import getToolByName
7
from Acquisition import aq_base
8
from zope.interface import implements
9 10 11
from OFS.Image import Image as OFSImage
from zLOG import LOG

12 13 14 15 16
from Products.ERP5OOo.OOoUtils import OOoBuilder
import re
from lxml import etree
from lxml.etree import ParseError, Element

17 18 19 20 21 22 23
from urllib import unquote
from urlparse import urlparse
try:
  # Python >= 2.6
  from urlparse import parse_qsl
except ImportError:
  from cgi import parse_qsl
24

25
CLEAN_RELATIVE_PATH = re.compile('^../')
26

27 28
class OOoDocumentDataStream:
  """Handle OOoDocument in Portal Transforms"""
29
  implements(idatastream)
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67

  def setData(self, value):
    """set the main"""
    self.value = value

  def getData(self):
    return self.value

  def setSubObjects(self, objects):
    pass

  def getSubObjects(self):
    return {}

  def getMetadata(self):
    """return a dict-like object with any optional metadata from
    the transform
    You can modify the returned dictionnary to add/change metadata
    """
    return {}

  def isCacheable(self):
    """
     True by Default
    """
    return getattr(self, '_is_cacheable', True)

  def setCachable(self, value):
    self._is_cacheable = value

class OOOdCommandTransform(commandtransform):
  """Transformer using oood"""

  def __init__(self, context, name, data, mimetype):
    commandtransform.__init__(self, name)
    if name:
      self.__name__ = name
    self.mimetype = mimetype
68
    self.context = context
69
    if self.mimetype == 'text/html':
70 71
      data = self.includeExternalCssList(data)
    self.data = data
72 73 74 75

  def name(self):
    return self.__name__

76
  def includeImageList(self, data):
77 78 79
    """Include Images in ODF archive

    - data: zipped archive content
80 81 82
    """
    builder = OOoBuilder(data)
    content = builder.extract('content.xml')
Nicolas Delaby's avatar
Nicolas Delaby committed
83 84 85 86
    xml_doc = etree.XML(content)
    image_tag_list = xml_doc.xpath('//*[name() = "draw:image"]')
    SVG_NAMESPACE = 'urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0'
    XLINK_NAMESPACE = 'http://www.w3.org/1999/xlink'
87
    ratio_px_cm = 2.54 / 100.
88 89
    # Flag to enable modification of OOoBuilder
    odt_content_modified = False
90
    for image_tag in image_tag_list:
91
      frame = image_tag.getparent()
92
      #Try to get image file from ZODB
Nicolas Delaby's avatar
Nicolas Delaby committed
93 94
      href_attribute_list = image_tag.xpath('.//@*[name() = "xlink:href"]')
      url = href_attribute_list[0]
95 96 97 98
      parse_result = urlparse(unquote(url))
      # urlparse return a 6-tuple: scheme, netloc, path, params, query, fragment
      path = parse_result[2]
      if path:
99 100 101 102
        # OOo corrupt relative Links inside HTML content during odt conversion
        # <img src="REF.TO.IMAGE" ... /> become <draw:image xlink:href="../REF.TO.IMAGE" ... />
        # So remove "../" added by OOo
        path = CLEAN_RELATIVE_PATH.sub('', path)
103 104 105
        # retrieve http parameters and use them to convert image
        query_parameter_string = parse_result[4]
        image_parameter_dict = dict(parse_qsl(query_parameter_string))
106 107
        try:
          image = self.context.restrictedTraverse(path)
108
        except (AttributeError, KeyError):
109 110 111
          #Image not found, this image is probably not hosted by ZODB. Do nothing
          image = None
        if image is not None:
112
          odt_content_modified = True
113
          content_type = image.getContentType()
114 115 116 117 118 119 120 121 122 123 124
          mimetype_list = getToolByName(self.context.getPortalObject(),
                                     'mimetypes_registry').lookup(content_type)

          format = image_parameter_dict.pop('format', None)
          for mimetype_object in mimetype_list:
            if mimetype_object.extensions:
              format = mimetype_object.extensions[0]
              break
            elif mimetype_object.globs:
              format = mimetype_object.globs.strip('*.')
              break
125
          if getattr(image, 'meta_type', None) == 'ERP5 Image':
126
            #ERP5 API
127 128
            # resize image according parameters
            mime, image_data = image.convert(None, **image_parameter_dict)
129 130 131 132 133 134
            image = OFSImage(image.getId(), image.getTitle(), image_data)

          # image should be OFSImage
          data = image.data
          width = image.width
          height = image.height
135
          if height:
Nicolas Delaby's avatar
Nicolas Delaby committed
136
            frame.attrib.update({'{%s}height' % SVG_NAMESPACE: '%.3fcm' % (height * ratio_px_cm)})
137
          if width:
Nicolas Delaby's avatar
Nicolas Delaby committed
138
            frame.attrib.update({'{%s}width' % SVG_NAMESPACE: '%.3fcm' % (width * ratio_px_cm)})
139
          new_path = builder.addImage(data, format=format)
Nicolas Delaby's avatar
Nicolas Delaby committed
140
          image_tag.attrib.update({'{%s}href' % XLINK_NAMESPACE: new_path})
141 142 143 144
    if odt_content_modified:
      builder.replace('content.xml', etree.tostring(xml_doc, encoding='utf-8',
                                                    xml_declaration=True,
                                                    pretty_print=False))
145 146 147
    return builder.render()

  def includeExternalCssList(self, data):
148 149 150 151
    """Replace external Css link by style Element,
    to avoid ooo querying portal without crendentials through http.

    - data: html content
152 153
    """
    try:
Nicolas Delaby's avatar
Nicolas Delaby committed
154 155
      xml_doc = etree.XML(data)
    except ParseError:
156 157 158
      #If not valid xhtml do nothing
      return data
    xpath = '//*[local-name() = "link"][@type = "text/css"]'
Nicolas Delaby's avatar
Nicolas Delaby committed
159
    css_link_tag_list = xml_doc.xpath(xpath)
160 161
    for css_link_tag in css_link_tag_list:
      #Try to get css from ZODB
Nicolas Delaby's avatar
Nicolas Delaby committed
162 163
      href_attribute_list = css_link_tag.xpath('.//@href')
      url = href_attribute_list[0]
164 165 166 167
      parse_result = urlparse(unquote(url))
      # urlparse return a 6-tuple: scheme, netloc, path, params, query, fragment
      path = parse_result[2]
      if path:
168
        try:
169 170
          css_object = self.context.restrictedTraverse(path)
        except (AttributeError, KeyError):
171
          #Image not found, this image is probably not hosted by ZODB. Do nothing
172 173 174 175 176 177 178 179
          css_object = None
        if css_object is not None:
          if callable(aq_base(css_object)):
            #In case of DTMLDocument
            css_as_text = css_object(client=self.context.getPortalObject())
          else:
            #Other cases like files
            css_as_text = str(css_object)
180
          parent_node = css_link_tag.getparent()
Nicolas Delaby's avatar
Nicolas Delaby committed
181 182 183 184 185 186 187
          style_node = Element('style')
          style_node.text = css_as_text
          parent_node.append(style_node)
          style_node.attrib.update({'type': 'text/css'})
          parent_node.remove(css_link_tag)
    return etree.tostring(xml_doc, encoding='utf-8',
                          xml_declaration=False, pretty_print=False, )
188

189
  def convert(self):
190
    tmp_ooo = newTempOOoDocument(self.context, self.name)
191 192 193
    # XXX We store the same content inside data and base_data
    # otherwise conversion server fails to convert html=>odt for example.
    # deeper investigation is required inside oood to understand this issue.
194
    tmp_ooo.edit( base_data=self.data,
195 196
                  fname=self.name,
                  source_reference=self.name,
197 198
                  base_content_type=self.mimetype,
                  content_type=self.mimetype,)
199 200 201 202 203
    self.ooo = tmp_ooo

  def convertTo(self, format):
    if self.ooo.isTargetFormatAllowed(format):
      mime, data = self.ooo.convert(format)
204
      if self.mimetype == 'text/html':
205
        data = self.includeImageList(data)
206 207
      return data
    else:
208
      raise ConversionError('Format not allowed %s' % format)