create a Mixin out of EmailDocument

This mixin contains all the methods returning values based on the mail message itself contained in the data property of the EmailDocument.

create a Mixin out of EmailDocument
This mixin contains all the methods returning values based on the mail message itself contained in the data property of the EmailDocument.
d41e23bd · Nicolas Wavrant · 997ab603 · d41e23bd · d41e23bd
Commit d41e23bd authored Mar 28, 2018 by Nicolas Wavrant
Show whitespace changes
Inline Side-by-side

Showing with 305 additions and 237 deletions

product/ERP5/Document/EmailDocument.py product/ERP5/Document/EmailDocument.py +4 -237

product/ERP5/mixin/mail_message.py product/ERP5/mixin/mail_message.py +301 -0

No files found.
--- a/product/ERP5/Document/EmailDocument.py
+++ b/product/ERP5/Document/EmailDocument.py
@@ -37,6 +37,7 @@ from Products.ERP5Type import Permissions, PropertySheet
 from Products.ERP5.Document.TextDocument import TextDocument
 from Products.ERP5.Document.File import File
 from Products.ERP5.Document.Document import ConversionError
+from Products.ERP5.mixin.mail_message import MailMessageMixin, testCharsetAndConvert
 from Products.ERP5.mixin.document_proxy import DocumentProxyMixin, DocumentProxyError
 from Products.ERP5.Tool.NotificationTool import buildEmailMessage
 from Products.ERP5Type.Utils import guessEncodingFromText
@@ -122,7 +123,7 @@ for method_id in ('getContentType',
  setattr(EmailDocumentProxyMixin, method_id,
      ProxiedMethod(method_id))

-class EmailDocument(TextDocument):
+class EmailDocument(TextDocument, MailMessageMixin):
  """
    EmailDocument is a File which stores its metadata in a form which
    is similar to a TextDocument.
@@ -187,37 +188,6 @@ class EmailDocument(TextDocument):
    except AttributeError:
      pass

-  def _getMessageTextPart(self):
-    """
-    Return the main text part of the message data
-
-    Based on rfc: http://tools.ietf.org/html/rfc2046#section-5.1.4)
-    """
-    # Default value if no text is found
-    found_part = None
-
-    part_list = [self._getMessage()]
-    while part_list:
-      part = part_list.pop(0)
-      if part.is_multipart():
-        if part.get_content_subtype() == 'alternative':
-          # Try to get the favourite text format defined on preference
-          preferred_content_type = self.getPortalObject().portal_preferences.\
-                                         getPreferredTextFormat('text/html')
-          favourite_part = None
-          for subpart in part.get_payload():
-            if subpart.get_content_type() == preferred_content_type:
-              part_list.insert(0, subpart)
-            else:
-              part_list.append(subpart)
-        else:
-          part_list.extend(part.get_payload())
-      elif part.get_content_maintype() == 'text':
-        found_part = part
-        break
-
-    return found_part
-
  security.declareProtected(Permissions.AccessContentsInformation,
                            'isSupportBaseDataConversion')
  def isSupportBaseDataConversion(self):
@@ -225,193 +195,6 @@ class EmailDocument(TextDocument):
    """
    return False

-  security.declareProtected(Permissions.AccessContentsInformation, 'getContentInformation')
-  def getContentInformation(self):
-    """
-    Returns the content information from the header information.
-    This is used by the metadata discovery system.
-
-    Header information is converted in UTF-8 since this is the standard
-    way of representing strings in ERP5.
-    """
-    result = {}
-    for (name, value) in self._getMessage().items():
-      try:
-        decoded_header = decode_header(value)
-      except HeaderParseError, error_message:
-        decoded_header = ()
-        LOG('EmailDocument.getContentInformation', INFO,
-            'Failed to decode %s header of %s with error: %s' %
-            (name, self.getPath(), error_message))
-      for text, encoding in decoded_header:
-        text, encoding = testCharsetAndConvert(text, 'text/plain', encoding)
-        if name in result:
-          result[name] = '%s %s' % (result[name], text)
-        else:
-          result[name] = text
-    return result
-
-  security.declareProtected(Permissions.AccessContentsInformation, 'getAttachmentInformationList')
-  def getAttachmentInformationList(self, **kw):
-    """
-    Returns a list of dictionnaries for every attachment. Each dictionnary
-    represents the metadata of the attachment.
-    **kw - support for listbox (TODO: improve it)
-    """
-    result = []
-    for i, part in enumerate(self._getMessage().walk()):
-      if not part.is_multipart():
-        kw = dict(part.items())
-        kw['uid'] = 'part_%s' % i
-        kw['index'] = i
-        filename = part.get_filename()
-        if not filename:
-          # get_filename return name only from Content-Disposition header
-          # of the message but sometimes this value is stored in
-          # Content-Type header
-          content_type_header = kw.get('Content-Type',
-                                                    kw.get('Content-type', ''))
-          filename_list = re.findall(filename_regexp,
-                                      content_type_header,
-                                      re.MULTILINE)
-          if filename_list:
-            filename = filename_list[0]
-        if filename:
-          kw['filename'] = filename
-        else:
-          content_disposition = kw.get('Content-Disposition',
-                                           kw.get('Content-disposition', None))
-          prefix = 'part_'
-          if content_disposition:
-            if content_disposition.split(';')[0] == 'attachment':
-              prefix = 'attachment_'
-            elif content_disposition.split(';')[0] == 'inline':
-              prefix = 'inline_'
-          kw['filename'] = '%s%s' % (prefix, i)
-        kw['content_type'] = part.get_content_type()
-        result.append(kw)
-    return result
-
-  security.declareProtected(Permissions.AccessContentsInformation, 'getAttachmentData')
-  def getAttachmentData(self, index, REQUEST=None):
-    """
-    Returns the decoded data of an attachment.
-    """
-    for i, part in enumerate(self._getMessage().walk()):
-      if index == i:
-        # This part should be handled in skin script
-        # but it was a bit easier to access items here
-        kw = dict(part.items())
-        content_type = part.get_content_type()
-        if REQUEST is not None:
-          filename = part.get_filename()
-          if not filename:
-            # get_filename return name only from Content-Disposition header
-            # of the message but sometimes this value is stored in
-            # Content-Type header
-            content_type_header = kw.get('Content-Type',
-                                                    kw.get('Content-type', ''))
-            filename_list = re.findall(filename_regexp,
-                                        content_type_header,
-                                        re.MULTILINE)
-            if filename_list:
-              filename = filename_list[0]
-          RESPONSE = REQUEST.RESPONSE
-          RESPONSE.setHeader('Accept-Ranges', 'bytes')
-          if content_type and filename:
-            RESPONSE.setHeader('Content-Type', content_type)
-            RESPONSE.setHeader('Content-disposition',
-                               'attachment; filename="%s"' % filename)
-        if 'text/html' in content_type:
-          part_encoding = part.get_content_charset()
-          message_text = part.get_payload(decode=1)
-          text_result, encoding = testCharsetAndConvert(message_text,
-                                                        content_type,
-                                                        part_encoding)
-          # Strip out html content in safe mode.
-          mime, content = self.convert(format='html',
-                                       text_content=text_result,
-                                       encoding=part_encoding,
-                                       index=index) # add index to generate
-                                       # a unique cache key per attachment
-        else:
-          content = part.get_payload(decode=1)
-        return content
-    return KeyError, "No attachment with index %s" % index
-
-  # Helper methods which override header property sheet
-  security.declareProtected(Permissions.AccessContentsInformation, 'getSender')
-  def getSender(self, *args):
-    """
-    """
-    if not self.hasData():
-      return self._baseGetSender(*args)
-    return self.getContentInformation().get('From', *args)
-
-  security.declareProtected(Permissions.AccessContentsInformation, 'getRecipient')
-  def getRecipient(self, *args):
-    """
-    """
-    if not self.hasData():
-      return self._baseGetRecipient(*args)
-    return self.getContentInformation().get('To', *args)
-
-  security.declareProtected(Permissions.AccessContentsInformation, 'getCcRecipient')
-  def getCcRecipient(self, *args):
-    """
-    """
-    if not self.hasData():
-      return self._baseGetCcRecipient(*args)
-    return self.getContentInformation().get('Cc', *args)
-
-  security.declareProtected(Permissions.AccessContentsInformation, 'getGroupingReference')
-  def getGroupingReference(self, *args):
-    """
-      The reference refers here to the Thread of messages.
-    """
-    if not self.hasData():
-      result = self._baseGetGroupingReference(*args)
-    else:
-      if not len(args):
-        args = (self._baseGetGroupingReference(),)
-      result = self.getContentInformation().get('References', *args)
-      if result:
-        result = result.split() # Only take the first reference
-        if result:
-          result = result[0]
-    if result:
-      return result
-    return self.getFilename(*args)
-
-  security.declareProtected(Permissions.AccessContentsInformation,
-                            'getSourceReference')
-  def getSourceReference(self, *args):
-    """
-      The Message-ID is considered here as the source reference
-      of the message on the sender side (source)
-    """
-    if not self.hasData():
-      return self._baseGetSourceReference(*args)
-    if not len(args):
-      args = (self._baseGetSourceReference(),)
-    content_information = self.getContentInformation()
-    return content_information.get('Message-ID') or content_information.get('Message-Id', *args)
-
-  security.declareProtected(Permissions.AccessContentsInformation, 'getDestinationReference')
-  def getDestinationReference(self, *args):
-    """
-      The In-Reply-To is considered here as the reference
-      of the thread on the side of a former sender (destination)
-
-      This is a hack which can be acceptable since
-      the reference of an email is shared.
-    """
-    if not self.hasData():
-      return self._baseGetDestinationReference(*args)
-    if not len(args):
-      args = (self._baseGetDestinationReference(),)
-    return self.getContentInformation().get('In-Reply-To', *args)
-
  # Overriden methods
  security.declareProtected(Permissions.AccessContentsInformation, 'getTitle')
  def getTitle(self, default=_MARKER):
@@ -504,7 +287,7 @@ class EmailDocument(TextDocument):
      else:
        return part.get_content_type()

-  email_parser = re.compile('[ ;,<>\'"]*([^<> ;,\'"]+?\@[^<> ;,\'"]+)[ ;,<>\'"]*',re.IGNORECASE)
+  email_parser = re.compile('[ ;,<>\'"]*([^<> ;,\'"]+?\@[^<> ;,\'"]+)[ ;,<>\'"]*', re.IGNORECASE)
  security.declareProtected(Permissions.AccessContentsInformation, 'getContentURLList')
  def getContentURLList(self):
    """
@@ -595,20 +378,4 @@ class EmailDocument(TextDocument):
  # getData must be implemented like File.getData is.
  security.declareProtected(Permissions.AccessContentsInformation, 'getData')
  getData = File.getData
-
-def testCharsetAndConvert(text_content, content_type, encoding):
-  try:
-    if encoding is not None:
-      text_content = text_content.decode(encoding).encode('utf-8')
-    else:
-      text_content = text_content.decode().encode('utf-8')
-  except (UnicodeDecodeError, LookupError), error_message:
-    encoding = guessEncodingFromText(text_content, content_type)
-    if encoding is not None:
-      try:
-        text_content = text_content.decode(encoding).encode('utf-8')
-      except (UnicodeDecodeError, LookupError):
-        text_content = repr(text_content)[1:-1]
-    else:
-      text_content = repr(text_content)[1:-1]
-  return text_content, encoding
+  getContentInformation = MailMessageMixin.getContentInformation
--- a/product/ERP5/mixin/mail_message.py
+++ b/product/ERP5/mixin/mail_message.py
+# -*- coding: utf-8 -*-
+##############################################################################
+#
+# Copyright (c) 2002-2018 Nexedi SA and Contributors. All Rights Reserved.
+#
+# WARNING: This program as such is intended to be used by professional
+# programmers who take the whole responsibility of assessing all potential
+# consequences resulting from its eventual inadequacies and bugs
+# End users who are looking for a ready-to-use solution with commercial
+# guarantees and support are strongly adviced to contract a Free Software
+# Service Company
+#
+# This program is Free Software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+##############################################################################
+
+from AccessControl import ClassSecurityInfo
+from Products.ERP5Type.Globals import InitializeClass
+from Products.ERP5Type import Permissions
+from Products.ERP5Type.Utils import guessEncodingFromText
+from Products.ERP5.Document.TextDocument import TextDocument
+from zLOG import LOG, INFO
+
+from email.header import decode_header, HeaderParseError
+
+import re
+
+filename_regexp = 'name="([^"]*)"'
+
+def testCharsetAndConvert(text_content, content_type, encoding):
+  try:
+    if encoding is not None:
+      text_content = text_content.decode(encoding).encode('utf-8')
+    else:
+      text_content = text_content.decode().encode('utf-8')
+  except (UnicodeDecodeError, LookupError), error_message:
+    encoding = guessEncodingFromText(text_content, content_type)
+    if encoding is not None:
+      try:
+        text_content = text_content.decode(encoding).encode('utf-8')
+      except (UnicodeDecodeError, LookupError):
+        text_content = repr(text_content)[1:-1]
+    else:
+      text_content = repr(text_content)[1:-1]
+  return text_content, encoding
+
+
+class MailMessageMixin:
+  """
+  Contains methods to read email's content or metadata for the data property
+  of the self object.
+  Sub-classes should implement a self._getMessage function returning a python
+  instance of email.message.Message.
+  The methods of this Mixin were originally in the Product.ERP5.Document.EmailDocument
+  class, but have been moved in a mixin to be shared with InternetMessagePost.
+  """
+
+  # Declarative security
+  security = ClassSecurityInfo()
+  security.declareObjectProtected(Permissions.AccessContentsInformation)
+
+  def _getMessage(self):
+    """
+    Method to overwrite in children classes, as it is the base of all
+    other methods of the mixin.
+    It should return an instance of email.message.Message
+    """
+    raise NotImplementedError
+
+  def _getMessageTextPart(self):
+    """
+    Return the main text part of the message data
+
+    Based on rfc: http://tools.ietf.org/html/rfc2046#section-5.1.4)
+    """
+    # Default value if no text is found
+    found_part = None
+
+    part_list = [self._getMessage()]
+    while part_list:
+      part = part_list.pop(0)
+      if part.is_multipart():
+        if part.get_content_subtype() == 'alternative':
+          # Try to get the favourite text format defined on preference
+          preferred_content_type = self.getPortalObject().portal_preferences.\
+                                         getPreferredTextFormat('text/html')
+          favourite_part = None
+          for subpart in part.get_payload():
+            if subpart.get_content_type() == preferred_content_type:
+              part_list.insert(0, subpart)
+            else:
+              part_list.append(subpart)
+        else:
+          part_list.extend(part.get_payload())
+      elif part.get_content_maintype() == 'text':
+        found_part = part
+        break
+  
+    return found_part
+
+  security.declareProtected(Permissions.AccessContentsInformation, 'getContentInformation')
+  def getContentInformation(self):
+    """
+    Returns the content information from the header information.
+    This is used by the metadata discovery system.
+
+    Header information is converted in UTF-8 since this is the standard
+    way of representing strings in ERP5.
+    """
+    result = {}
+    for (name, value) in self._getMessage().items():
+      try:
+        decoded_header = decode_header(value)
+      except HeaderParseError, error_message:
+        decoded_header = ()
+        LOG('MailMessageMixin.getContentInformation', INFO,
+            'Failed to decode %s header of %s with error: %s' %
+            (name, self.getPath(), error_message))
+      for text, encoding in decoded_header:
+        text, encoding = testCharsetAndConvert(text, 'text/plain', encoding)
+        if name in result:
+          result[name] = '%s %s' % (result[name], text)
+        else:
+          result[name] = text
+    return result
+
+  security.declareProtected(Permissions.AccessContentsInformation, 'getAttachmentInformationList')
+  def getAttachmentInformationList(self, **kw):
+    """
+    Returns a list of dictionnaries for every attachment. Each dictionnary
+    represents the metadata of the attachment.
+    **kw - support for listbox (TODO: improve it)
+    """
+    result = []
+    for i, part in enumerate(self._getMessage().walk()):
+      if not part.is_multipart():
+        kw = dict(part.items())
+        kw['uid'] = 'part_%s' % i
+        kw['index'] = i
+        filename = part.get_filename()
+        if not filename:
+          # get_filename return name only from Content-Disposition header
+          # of the message but sometimes this value is stored in
+          # Content-Type header
+          content_type_header = kw.get('Content-Type',
+                                       kw.get('Content-type', ''))
+          filename_list = re.findall(filename_regexp,
+                                     content_type_header,
+                                     re.MULTILINE)
+          if filename_list:
+            filename = filename_list[0]
+        if filename:
+          kw['filename'] = filename
+        else:
+          content_disposition = kw.get('Content-Disposition',
+                                       kw.get('Content-disposition', None))
+          prefix = 'part_'
+          if content_disposition:
+            if content_disposition.split(';')[0] == 'attachment':
+              prefix = 'attachment_'
+            elif content_disposition.split(';')[0] == 'inline':
+              prefix = 'inline_'
+          kw['filename'] = '%s%s' % (prefix, i)
+        kw['content_type'] = part.get_content_type()
+        result.append(kw)
+    return result
+
+  security.declareProtected(Permissions.AccessContentsInformation, 'getAttachmentData')
+  def getAttachmentData(self, index, REQUEST=None):
+    """
+    Returns the decoded data of an attachment.
+    """
+    for i, part in enumerate(self._getMessage().walk()):
+      if index == i:
+        # This part should be handled in skin script
+        # but it was a bit easier to access items here
+        kw = dict(part.items())
+        content_type = part.get_content_type()
+        if REQUEST is not None:
+          filename = part.get_filename()
+          if not filename:
+            # get_filename return name only from Content-Disposition header
+            # of the message but sometimes this value is stored in
+            # Content-Type header
+            content_type_header = kw.get('Content-Type',
+                                         kw.get('Content-type', ''))
+            filename_list = re.findall(filename_regexp,
+                                       content_type_header,
+                                       re.MULTILINE)
+            if filename_list:
+              filename = filename_list[0]
+          RESPONSE = REQUEST.RESPONSE
+          RESPONSE.setHeader('Accept-Ranges', 'bytes')
+          if content_type and filename:
+            RESPONSE.setHeader('Content-Type', content_type)
+            RESPONSE.setHeader('Content-disposition',
+                               'attachment; filename="%s"' % filename)
+        if 'text/html' in content_type:
+          part_encoding = part.get_content_charset()
+          message_text = part.get_payload(decode=1)
+          text_result, encoding = testCharsetAndConvert(message_text,
+                                                        content_type,
+                                                        part_encoding)
+          # Strip out html content in safe mode.
+          mime, content = self.convert(format='html',
+                                       text_content=text_result,
+                                       encoding=part_encoding,
+                                       index=index) # add index to generate
+                                       # a unique cache key per attachment
+        else:
+          content = part.get_payload(decode=1)
+        return content
+    return KeyError, "No attachment with index %s" % index
+
+  # Helper methods which override header property sheet
+  security.declareProtected(Permissions.AccessContentsInformation, 'getSender')
+  def getSender(self, *args):
+    """
+    """
+    if not self.hasData():
+      return self._baseGetSender(*args)
+    return self.getContentInformation().get('From', *args)
+
+  security.declareProtected(Permissions.AccessContentsInformation, 'getRecipient')
+  def getRecipient(self, *args):
+    """
+    """
+    if not self.hasData():
+      return self._baseGetRecipient(*args)
+    return self.getContentInformation().get('To', *args)
+
+  security.declareProtected(Permissions.AccessContentsInformation, 'getCcRecipient')
+  def getCcRecipient(self, *args):
+    """
+    """
+    if not self.hasData():
+      return self._baseGetCcRecipient(*args)
+    return self.getContentInformation().get('Cc', *args)
+
+  security.declareProtected(Permissions.AccessContentsInformation, 'getGroupingReference')
+  def getGroupingReference(self, *args):
+    """
+      The reference refers here to the Thread of messages.
+    """
+    if not self.hasData():
+      result = self._baseGetGroupingReference(*args)
+    else:
+      if not len(args):
+        args = (self._baseGetGroupingReference(),)
+      result = self.getContentInformation().get('References', *args)
+      if result:
+        result = result.split()  # Only take the first reference
+        if result:
+          result = result[0]
+    if result:
+      return result
+    return self.getFilename(*args)
+
+  security.declareProtected(Permissions.AccessContentsInformation,
+                            'getSourceReference')
+  def getSourceReference(self, *args):
+    """
+      The Message-ID is considered here as the source reference
+      of the message on the sender side (source)
+    """
+    if not self.hasData():
+      return self._baseGetSourceReference(*args)
+    if not len(args):
+      args = (self._baseGetSourceReference(),)
+    content_information = self.getContentInformation()
+    return content_information.get('Message-ID') or content_information.get('Message-Id', *args)
+
+  security.declareProtected(Permissions.AccessContentsInformation, 'getDestinationReference')
+  def getDestinationReference(self, *args):
+    """
+      The In-Reply-To is considered here as the reference
+      of the thread on the side of a former sender (destination)
+
+      This is a hack which can be acceptable since
+      the reference of an email is shared.
+    """
+    if not self.hasData():
+      return self._baseGetDestinationReference(*args)
+    if not len(args):
+      args = (self._baseGetDestinationReference(),)
+    return self.getContentInformation().get('In-Reply-To', *args)
+
+
+InitializeClass(MailMessageMixin)