# -*- coding: utf-8 -*- ############################################################################## # # Copyright (c) 2009 Nexedi SA and Contributors. All Rights Reserved. # Jean-Paul Smets-Solanes <jp@nexedi.com> # # WARNING: This program as such is intended to be used by professional # programmers who take the whole responsability of assessing all potential # consequences resulting from its eventual inadequacies and bugs # End users who are looking for a ready-to-use solution with commercial # garantees and support are strongly adviced to contract a Free Software # Service Company # # This program is Free Software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # ############################################################################## import md5 import string import xmlrpclib, base64, re, zipfile, cStringIO from xmlrpclib import Fault from xmlrpclib import Transport from xmlrpclib import SafeTransport from Acquisition import aq_base from AccessControl import ClassSecurityInfo from Products.ERP5Type import Permissions from Products.CMFCore.utils import getToolByName from Products.ERP5Type.Cache import DEFAULT_CACHE_SCOPE from Products.ERP5Type.Base import WorkflowMethod from zLOG import LOG from Products.ERP5Type.Cache import CachingMethod # Mixin import from Products.ERP5.mixin.convertable import ConvertableMixin from Products.ERP5.mixin.cached_convertable import CachedConvertableMixin class HTMLConvertableMixin(CachedConvertableMixin): """ This class provides a generic implementation of IHTMLConvertable. """ # Declarative security security = ClassSecurityInfo() # Declarative security security = ClassSecurityInfo() security.declareObjectProtected(Permissions.AccessContentsInformation) security.declarePrivate('_asHTML') def _asHTML(self, **kw): """ A private method which converts to HTML. This method is the one to override in subclasses. """ if not self.hasBaseData(): raise ConversionError('This document has not been processed yet.') try: # FIXME: no substitution may occur in this case. mime, data = self.getConversion(format='base-html') return data except KeyError: kw['format'] = 'html' mime, html = self.convert(**kw) return html security.declareProtected(Permissions.View, 'asEntireHTML') def asEntireHTML(self, **kw): """ Returns a complete HTML representation of the document (with body tags, etc.). Adds if necessary a base tag so that the document can be displayed in an iframe or standalone. Actual conversion is delegated to _asHTML """ html = self._asHTML(**kw) if self.getUrlString(): # If a URL is defined, add the base tag # if base is defined yet. html = str(html) if not html.find('<base') >= 0: base = '<base href="%s">' % self.getContentBaseURL() html = html.replace('<head>', '<head>%s' % base) self.setConversion(html, mime='text/html', format='base-html') return html security.declareProtected(Permissions.View, 'asStrippedHTML') def asStrippedHTML(self, **kw): """ Returns a stripped HTML representation of the document (without html and body tags, etc.) which can be used to inline a preview of the document. """ if not self.hasBaseData(): return '' try: # FIXME: no substitution may occur in this case. mime, data = self.getConversion(format='stripped-html') return data except KeyError: kw['format'] = 'html' mime, html = self.convert(**kw) return self._stripHTML(str(html)) def _guessEncoding(self, string): """ Try to guess the encoding for this string. Returns None if no encoding can be guessed. """ try: import chardet except ImportError: return None return chardet.detect(string).get('encoding', None) def _stripHTML(self, html, charset=None): """ A private method which can be reused by subclasses to strip HTML content """ body_list = re.findall(self.body_parser, str(html)) if len(body_list): stripped_html = body_list[0] else: stripped_html = html # find charset and convert to utf-8 charset_list = self.charset_parser.findall(str(html)) # XXX - Not efficient if this # is datastream instance but hard to do better if charset and not charset_list: # Use optional parameter is we can not find encoding in HTML charset_list = [charset] if charset_list and charset_list[0] not in ('utf-8', 'UTF-8'): try: stripped_html = unicode(str(stripped_html), charset_list[0]).encode('utf-8') except (UnicodeDecodeError, LookupError): return str(stripped_html) return stripped_html