# -*- coding: utf-8 -*- ############################################################################## # # Copyright (c) 2002 Nexedi SARL and Contributors. All Rights Reserved. # Jean-Paul Smets-Solanes <jp@nexedi.com> # # WARNING: This program as such is intended to be used by professional # programmers who take the whole responsability of assessing all potential # consequences resulting from its eventual inadequacies and bugs # End users who are looking for a ready-to-use solution with commercial # garantees and support are strongly adviced to contract a Free Software # Service Company # # This program is Free Software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # ############################################################################## from AccessControl.ZopeGuards import guarded_getattr from AccessControl import ClassSecurityInfo from zLOG import LOG, WARNING from Products.ERP5Type.Base import WorkflowMethod from Products.CMFCore.utils import getToolByName from Products.CMFCore.utils import _setCacheHeaders, _ViewEmulator from Products.ERP5Type import Permissions, PropertySheet from Products.ERP5.Document.Document import Document, ConversionError,\ NotConvertedError from Products.ERP5.Document.File import File from Products.ERP5Type.WebDAVSupport import TextContent import re import md5 # Mixin Import from Products.ERP5.mixin.cached_convertable import CachedConvertableMixin from Products.ERP5.mixin.base_convertable import BaseConvertableMixin from Products.ERP5.mixin.base_convertable_and_file import BaseConvertableAndFileMixin try: from string import Template except ImportError: from Products.ERP5Type.patches.string import Template DEFAULT_TEXT_FORMAT = 'text/html' class TextDocument(BaseConvertableAndFileMixin, CachedConvertableMixin, CachedConvertableMixin, BaseConvertableMixin, TextContent, File): """A TextDocument impletents IDocument, IFile, IBaseConvertable, ICachedconvertable and ITextConvertable """ meta_type = 'ERP5 Text Document' portal_type = 'Text Document' add_permission = Permissions.AddPortalContent # Declarative security security = ClassSecurityInfo() security.declareObjectProtected(Permissions.AccessContentsInformation) # Declarative properties property_sheets = ( PropertySheet.Base , PropertySheet.XMLObject , PropertySheet.CategoryCore , PropertySheet.DublinCore , PropertySheet.Version , PropertySheet.Document , PropertySheet.Snapshot , PropertySheet.ExternalDocument , PropertySheet.Url , PropertySheet.TextDocument , PropertySheet.Data , PropertySheet.Reference ) def _substituteTextContent(self, text, safe_substitute=True, **kw): # If a method for string substitutions of the text content, perform it. # Decode everything into unicode before the substitutions, in order to # avoid encoding errors. method_id = self.getTextContentSubstitutionMappingMethodId() if method_id: try: mapping = guarded_getattr(self, method_id)(**kw) except AttributeError: LOG('TextDocument', WARNING, 'could not get the substitution' ' mapping method %s from %r, so the content will not be' ' substituted.' % (method_id, self.getRelativeUrl())) return text is_str = isinstance(text, str) if is_str: text = text.decode('utf-8') unicode_mapping = {} for k, v in mapping.iteritems(): if isinstance(v, str): v = v.decode('utf-8') elif not isinstance(v, unicode): v = str(v).decode('utf-8') unicode_mapping[k] = v if safe_substitute: text = Template(text).safe_substitute(unicode_mapping) else: text = Template(text).substitute(unicode_mapping) # If the original was a str, convert it back to str. if is_str: text = text.encode('utf-8') return text security.declareProtected(Permissions.View, 'asSubjectText') def asSubjectText(self, substitution_method_parameter_dict=None, safe_substitute=True, **kw): """ Converts the subject of the document to a textual representation. """ subject = TextDocument.inheritedAttribute('asSubjectText')(self, **kw) if substitution_method_parameter_dict is None: substitution_method_parameter_dict = {} return self._substituteTextContent(subject, safe_substitute=safe_substitute, **substitution_method_parameter_dict) security.declareProtected(Permissions.AccessContentsInformation, 'convert') def convert(self, format, substitution_method_parameter_dict=None, safe_substitute=True, charset=None, text_content=None, **kw): """ Convert text using portal_transforms or oood """ # Accelerate rendering in Web mode _setCacheHeaders(_ViewEmulator().__of__(self), {'format' : format}) # Return the raw content if format == 'raw': return 'text/plain', self.getTextContent() portal = self.getPortalObject() mime_type = getToolByName(portal, 'mimetypes_registry').\ lookupExtension('name.%s' % format) original_mime_type = mime_type = str(mime_type) src_mimetype = self.getTextFormat(DEFAULT_TEXT_FORMAT) if not src_mimetype.startswith('text/'): src_mimetype = 'text/%s' % src_mimetype if text_content is None: # check if document has set text_content and convert if necessary text_content = self.getTextContent() if text_content: if not self.hasConversion(format=format, **kw): portal_transforms = getToolByName(portal, 'portal_transforms') filename = self.getSourceReference(self.getTitleOrId()) if mime_type == 'text/html': mime_type = 'text/x-html-safe' if charset is None: # find charset re_match = self.charset_parser.search(text_content) if re_match is not None: charset = re_match.group('charset') if charset and charset not in ('utf-8', 'UTF-8'): try: text_content = text_content.decode(charset).encode('utf-8') except (UnicodeDecodeError, LookupError): pass else: charset = 'utf-8' # Override charset if convertion succeeds # change charset value in html_document as well def subCharset(matchobj): keyword = matchobj.group('keyword') charset = matchobj.group('charset') if not (keyword or charset): # no match, return same string return matchobj.group(0) elif keyword: # if keyword is present, replace charset just after return keyword + 'utf-8' text_content = self.charset_parser.sub(subCharset, text_content) result = portal_transforms.convertToData(mime_type, text_content, object=self, context=self, filename=filename, mimetype=src_mimetype, encoding=charset) if result is None: raise ConversionError('TextDocument conversion error. ' 'portal_transforms failed to convert'\ 'to %s: %r' % (mime_type, self)) self.setConversion(result, original_mime_type, format=format, **kw) else: mime_type, result = self.getConversion(format=format, **kw) if substitution_method_parameter_dict is None: substitution_method_parameter_dict = {} result = self._substituteTextContent(result, safe_substitute=safe_substitute, **substitution_method_parameter_dict) return original_mime_type, result else: # text_content is not set, return empty string instead of None return original_mime_type, '' def __call__(self): _setCacheHeaders(_ViewEmulator().__of__(self), {}) return Document.__call__(self) security.declareProtected(Permissions.AccessContentsInformation, 'getContentBaseURL') def getContentBaseURL(self): """ Returns the content base URL based on the actual content (in HTML) """ if self.hasBaseData(): html = self._asHTML() base_list = re.findall(self.base_parser, str(html)) if base_list: return base_list[0] return Document.getContentBaseURL(self) security.declareProtected(Permissions.AccessContentsInformation, 'hasBaseData') def hasBaseData(self): """ A TextDocument store its data in the "text_content" property. Since there is no such thing as base_data in TextDocument, having base_data is equivalent to having some text_content. """ return self.hasTextContent() security.declareProtected(Permissions.AccessContentsInformation, 'getMimeTypeAndContent') def getMimeTypeAndContent(self): """This method returns a tuple which contains mimetype and content.""" return (self.getTextFormat(), self.getTextContent()) security.declareProtected(Permissions.ModifyPortalContent, 'updateContentMd5') def updateContentMd5(self): """Update md5 checksum from the original file XXX-JPS - this method is not part of any interfacce. should it be public or private. It is called by some interaction workflow already. Is it general or related to caching only ? """ data = self.getTextContent() if data is not None: data = str(data) # Usefull for Pdata self._setContentMd5(md5.new(data).hexdigest()) # Reindex is useless else: self._setContentMd5(None)