diff --git a/product/ERP5/Document/Document.py b/product/ERP5/Document/Document.py
index 8f11e5db9ee7642a215ef9b91779a366b236dae4..da5ea5cee970813fdfe19e3b4826d345698969e5 100644
--- a/product/ERP5/Document/Document.py
+++ b/product/ERP5/Document/Document.py
@@ -394,6 +394,7 @@ class Document(XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin):
   body_parser = re.compile('<body[^>]*>(.*?)</body>', re.IGNORECASE + re.DOTALL)
   title_parser = re.compile('<title[^>]*>(.*?)</title>', re.IGNORECASE + re.DOTALL)
   base_parser = re.compile('<base[^>]*href=[\'"](.*?)[\'"][^>]*>', re.IGNORECASE + re.DOTALL)
+  charset_parser = re.compile('charset="?([a-z0-9\-]+)', re.IGNORECASE)
 
   # Declarative security
   security = ClassSecurityInfo()
@@ -1083,14 +1084,20 @@ class Document(XMLObject, UrlMixIn, ConversionCacheMixin, SnapshotMixin):
       (without html and body tags, etc.) which can be used to inline
       a preview of the document.
     """
-    if self.hasConversion(format='stripped-html'):
+    if self.hasConversion(format='stripped-html'): # XXX this is redundant since we never set it
       mime, data = self.getConversion(format='stripped-html')
       return data
     mime, html = self.convert(format='html')
     body_list = re.findall(self.body_parser, str(html))
     if len(body_list):
-      return body_list[0]
-    return html
+      stripped_html = body_list[0]
+    else:
+      stripped_html = html
+    # find charset and convert to utf-8
+    charset_list = self.charset_parser.findall(html)
+    if charset_list:
+      stripped_html = unicode(stripped_html, charset_list[0]).encode('utf-8')
+    return stripped_html
 
   security.declareProtected(Permissions.AccessContentsInformation, 'getContentInformation')
   def getContentInformation(self):