From 4dc78702a103c8367a47b0135fba9a91bca3c0d5 Mon Sep 17 00:00:00 2001
From: Romain Courteaud <romain@nexedi.com>
Date: Thu, 3 Jul 2008 09:55:20 +0000
Subject: [PATCH] Handle inconsistent encoding in mail message (in case
 declared charset is different from the one used).

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@22238 20353a03-c40f-0410-a6d1-a30d3c3de9de
---
 product/ERP5/Document/Document.py      | 6 +++++-
 product/ERP5/Document/EmailDocument.py | 5 ++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/product/ERP5/Document/Document.py b/product/ERP5/Document/Document.py
index de091186fa..3a6bb9d0d9 100644
--- a/product/ERP5/Document/Document.py
+++ b/product/ERP5/Document/Document.py
@@ -1274,7 +1274,11 @@ class Document(PermanentURLMixIn, XMLObject, UrlMixIn, ConversionCacheMixin, Sna
     charset_list = self.charset_parser.findall(str(html)) # XXX - Not efficient is datastream 
                                                           # instance but hard to do better
     if charset_list and charset_list[0] not in ('utf-8', 'UTF-8'):
-      stripped_html = unicode(str(stripped_html), charset_list[0]).encode('utf-8')
+      try:
+        stripped_html = unicode(str(stripped_html), 
+                                charset_list[0]).encode('utf-8')
+      except UnicodeDecodeError:
+        return str(stripped_html)
     return stripped_html
 
   security.declareProtected(Permissions.AccessContentsInformation, 'getContentInformation')
diff --git a/product/ERP5/Document/EmailDocument.py b/product/ERP5/Document/EmailDocument.py
index 38a91e183b..29fec853c2 100644
--- a/product/ERP5/Document/EmailDocument.py
+++ b/product/ERP5/Document/EmailDocument.py
@@ -277,7 +277,10 @@ class EmailDocument(File, TextDocument):
       elif part.get_content_type() == 'text/html' and not html_result and not part.is_multipart():
         part_encoding = part.get_content_charset()
         if part_encoding not in (None, 'utf-8',):
-          return part.get_payload(decode=1).decode(part_encoding).encode('utf-8')
+          try:
+            return part.get_payload(decode=1).decode(part_encoding).encode('utf-8')
+          except UnicodeDecodeError:
+            return part.get_payload(decode=1)
         return part.get_payload(decode=1)
     return text_result
 
-- 
2.30.9