diff --git a/product/ERP5/Document/Document.py b/product/ERP5/Document/Document.py index bed643c0d8574be71bd1d238375e8bae838b029b..b5addf5650f246018210e5696fd13e4152e78c39 100644 --- a/product/ERP5/Document/Document.py +++ b/product/ERP5/Document/Document.py @@ -28,6 +28,7 @@ from DateTime import DateTime from operator import add +import re from AccessControl import ClassSecurityInfo, getSecurityManager from Acquisition import aq_base @@ -37,9 +38,10 @@ from Products.ERP5Type import Permissions, PropertySheet, Constraint, Interface from Products.ERP5Type.XMLObject import XMLObject from Products.ERP5Type.WebDAVSupport import TextContent from Products.ERP5Type.Message import Message +from Products.ERP5Type.Utils import convertToUpperCase, convertToMixedCase _MARKER = [] -VALID_ORDER_KEY_LIST = ('user', 'content', 'file_name', 'input') +VALID_ORDER_KEY_LIST = ('user_login', 'content', 'file_name', 'input') def makeSortedTuple(kw): items = kw.items() @@ -262,7 +264,7 @@ class Document(XMLObject): * Document_getPropertyDictFromContent - analyzes document content and returns properties which should be set on the document - * Base_getImplicitSuccesorValueList - finds appropriate all documents + * Base_getImplicitSuccessorValueList - finds appropriate all documents referenced in the current content * Base_getImplicitPredecessorValueList - finds document predecessors based on @@ -317,7 +319,8 @@ class Document(XMLObject): ### Content processing methods - def index_html(self, REQUEST, RESPONSE, format=None, **kw): + security.declareProtected(Permissions.View, 'index_html') + def index_html(self, REQUEST, RESPONSE, format=None, force=0, **kw): """ We follow here the standard Zope API for files and images and extend it to support format conversion. The idea @@ -331,36 +334,57 @@ class Document(XMLObject): withing the layout of a Web Site or withing a standard ERP5 page. Please refer to the index_html of TextDocument. - format - the format specified in the form of an extension + Should return appropriate format (calling convert + if necessary) and set headers. + + format - the format specied in the form of an extension + string (ex. jpeg, html, text, txt, etc.) + force - convert doc even if it has a cached version which seems to be up2date + **kw can be various things - e.g. resolution + + """ + pass + + security.declareProtected(Permissions.View, 'convert') + def convert(self, format, **kw): + """ + Main content conversion function, returns result which should + be returned and stored in cache. + format - the format specied in the form of an extension string (ex. jpeg, html, text, txt, etc.) + **kw can be various things - e.g. resolution """ pass security.declareProtected(Permissions.View, 'getSearchableText') def getSearchableText(self, md=None): """ - Used by the catalog for basic full text indexing. - Uses searchable_property_list attribute to put together various properties - of the document into one searchable text string. + Used by the catalog for basic full text indexing. + Uses searchable_property_list attribute to put together various properties + of the document into one searchable text string. - XXX-JPS - This method is nice. It should probably be moved to Base class - searchable_property_list could become a standard class attribute. + XXX-JPS - This method is nice. It should probably be moved to Base class + searchable_property_list could become a standard class attribute. - TODO (future): Make this property a per portal type property. + TODO (future): Make this property a per portal type property. """ def getPropertyListOrValue(property): """ - we try to get a list, else we get value and convert to list + we try to get a list, else we get value and convert to list """ val = self.getPropertyList(property) if val is None: val = self.getProperty(property) if val is not None and val != '': - val=[val] + val = [val] + else: + val = [] + else: + val = list(val) return val - - searchable_text = reduce(add, map(lambda x: self.getPropertyListOrValue(x) or ' ', + searchable_text = reduce(add, map(lambda x: getPropertyListOrValue(x), self.searchable_property_list)) + searchable_text = ' '.join(searchable_text) return searchable_text # Compatibility with CMF Catalog @@ -377,7 +401,7 @@ class Document(XMLObject): preferences. """ text = self.getSearchableText() - regexp = self.getPreferredReferenceLookupRegexp() + regexp = self.portal_preferences.getPreferredDocumentReferenceRegularExpression() try: rx_search = re.compile(regexp) except TypeError: # no regexp in preference @@ -390,11 +414,11 @@ class Document(XMLObject): security.declareProtected(Permissions.View, 'getImplicitSuccessorValueList') def getImplicitSuccessorValueList(self): """ - Find objects which we are referencing (if our text_content contains - references of other documents). The whole implementation is delegated to - Document_getImplicitSuccessorValueList script. + Find objects which we are referencing (if our text_content contains + references of other documents). The whole implementation is delegated to + Base_getImplicitSuccessorValueList script. - The implementation goes in 2 steps: + The implementation goes in 2 steps: - Step 1: extract with a regular expression a list of distionaries with various parameters such as @@ -414,22 +438,19 @@ class Document(XMLObject): later stage of the implementation. """ # XXX results should be cached as volatile attributes - # XXX-JPS - Please use TransactionCache in ERP5Type for this - # TransactionCache does all the work for you - lst = [] - for ref in self.getSearchableReferenceList(): - r = ref[1] - res = self.Document_findImplicitSuccessor(**r) - if len(res)>0: - lst.append(res[0].getObject()) - return lst + refs = [r[1] for r in self.getSearchableReferenceList()] + res = self.Base_getImplicitSuccessorValueList(refs) + # get unique latest (most relevant) versions + res = [r.getObject().getLatestVersionValue() for r in res] + res_dict = dict.fromkeys(res) + return res_dict.keys() security.declareProtected(Permissions.View, 'getImplicitPredecessorValueList') def getImplicitPredecessorValueList(self): """ This function tries to find document which are referencing us - by reference only, or by reference/language etc. Implementation is passed to - Document_getImplicitPredecessorValueList + Base_getImplicitPredecessorValueList The script should proceed in two steps: @@ -445,11 +466,13 @@ class Document(XMLObject): later stage of the implementation. """ # XXX results should be cached as volatile attributes - method = self._getTypeBasedMethod('findImplicitPredecessorList', - fallback_script_id = 'Document_findImplicitPredecessorList') + method = self._getTypeBasedMethod('getImplicitPredecessorValueList', + fallback_script_id = 'Base_getImplicitPredecessorValueList') lst = method() - lst = [r.getObject() for r in lst] - di = dict.fromkeys(lst) # make it unique + # make it unique first time (before getting lastversionvalue) + di = dict.fromkeys([r.getObject() for r in lst]) + # then get latest version and make unique again + di = dict.fromkeys([o.getLatestVersionValue() for o in di.keys()]) ref = self.getReference() return [o for o in di.keys() if o.getReference() != ref] # every object has its own reference in SearchableText @@ -464,7 +487,7 @@ class Document(XMLObject): return [] security.declareProtected(Permissions.View, 'getSimilarCloudValueList') - def getSimilarCloudValueList(self): + def getSimilarCloudValueList(self, depth=0): """ Returns all documents which are similar to us, directly or indirectly, and in both directions. In other words, it is a transitive closure of similar @@ -473,45 +496,83 @@ class Document(XMLObject): lista = {} depth = int(depth) - gettername = 'get%sValueList' % upperCase(category) - relatedgettername = 'get%sRelatedValueList' % upperCase(category) + #gettername = 'get%sValueList' % convertToUpperCase(category) + #relatedgettername = 'get%sRelatedValueList' % convertToUpperCase(category) - def getRelatedList(self, level=0): + def getRelatedList(ob, level=0): level += 1 - getter = getattr(self, gettername) - relatedgetter = getattr(self, relatedgettername) - res = getter() + relatedgetter() + #getter = getattr(self, gettername) + #relatedgetter = getattr(self, relatedgettername) + #res = getter() + relatedgetter() + res = ob.getSimilarValueList() + ob.getSimilarRelatedValueList() for r in res: if lista.get(r) is None: lista[r] = True # we use dict keys to ensure uniqueness if level != depth: getRelatedList(r, level) - getRelatedList(context) + getRelatedList(self) lista_latest = {} for o in lista.keys(): lista_latest[o.getLatestVersionValue()] = True # get latest versions avoiding duplicates again - if lista_latest.has_key(context): lista_latest.pop(context) # remove this document - if lista_latest.has_key(context.getLatestVersionValue()): lista_latest.pop(contextLatestVersionValue()) # remove this document + if lista_latest.has_key(self): lista_latest.pop(self) # remove this document + if lista_latest.has_key(self.getLatestVersionValue()): lista_latest.pop(self()) # remove this document return lista_latest.keys() + security.declareProtected(Permissions.View, 'hasFile') + def hasFile(self): + """ + Checks whether we have an initial file + """ + _marker = [] + if getattr(self,'data', _marker) is not _marker: # XXX-JPS - use propertysheet accessors + return getattr(self, 'data') is not None + return False + ### Version and language getters - might be moved one day to a mixin class in base security.declareProtected(Permissions.View, 'getLatestVersionValue') def getLatestVersionValue(self, language=None): """ - Tries to find the latest version with the latest revions - of self which the current user is allowed to access. + Tries to find the latest version with the latest revision + of self which the current user is allowed to access. - If language is provided, return the latest document - in the language. + If language is provided, return the latest document + in the language. - If language is not provided, return the latest version - in any language or in the user language if the version is - the same. + If language is not provided, return the latest version + in original language or in the user language if the version is + the same. """ - # Use portal_catalog - pass + catalog = getToolByName(self, 'portal_catalog', None) + kw = dict(reference=self.getReference(), sort_on=(('version','descending'),('revision','descending'),)) + if language is not None: kw['language'] = language + res = catalog(**kw) + + original_language = self.getOriginalLanguage() + user_language = self.Localizer.get_selected_language() + + # if language was given return it + if language is not None: + return res[0] + else: + first = res[0] + in_original = None + for ob in res: + if ob.getLanguage() == original_language: + # this is in original language + in_original = ob + if ob.getVersion() != first.getVersion(): + # we are out of the latest version - return in_original or first + if in_original is not None: + return in_original + else: + return first # this shouldn't happen in real life + if ob.getLanguage() == user_language: + # we found it in the user language + return ob + # this is the only doc in this version + return self security.declareProtected(Permissions.View, 'getVersionValueList') def getVersionValueList(self, version=None, language=None): @@ -520,55 +581,103 @@ class Document(XMLObject): but different version and given language or any language if not given. """ catalog = getToolByName(self, 'portal_catalog', None) - return catalog(portal_type=self.getPortalType(), + kw = dict(portal_type=self.getPortalType(), reference=self.getReference(), - version=version, - language=language, group_by=('revision',), order_by=(('revision', 'descending', 'SIGNED'),) ) + if version: kw['version'] = version + if language: kw['language'] = language + return catalog(**kw) security.declareProtected(Permissions.View, 'isVersionUnique') def isVersionUnique(self): """ Returns true if no other document of the same portal_type and reference has the same version and language + + XXX should delegate to script with proxy roles """ catalog = getToolByName(self, 'portal_catalog', None) - return catalog.countResults(portal_type=self.getPortalType(), + # XXX why this does not work??? + #return catalog.countResults(portal_type=self.getPortalType(), + #reference=self.getReference(), + #version=self.getVersion(), + #language=self.getLanguage(), + #) <= 1 + return len(catalog(portal_type=self.getPortalType(), reference=self.getReference(), version=self.getVersion(), language=self.getLanguage(), - ) <= 1 + )) <= 1 security.declareProtected(Permissions.View, 'getLatestRevisionValue') def getLatestRevisionValue(self): """ Returns the latest revision of ourselves """ - # Use portal_catalog - pass + if not self._checkCompleteCoordinates(): + return None + catalog = getToolByName(self, 'portal_catalog', None) + res = catalog( + reference=self.getReference(), + language=self.getLanguage(), + version=self.getVersion(), + sort_on=(('revision','descending'),) + ) + if len(res) == 0: + return None + return res[0].getObject() security.declareProtected(Permissions.View, 'getRevisionValueList') def getRevisionValueList(self): """ Returns a list revision strings for a given reference, version, language + XXX should it return revision strings, or docs (as the func name would suggest)? """ # Use portal_catalog - pass + if not self._checkCompleteCoordinates(): + return [] + res = self.portal_catalog(reference=self.getReference(), + language=self.getLanguage(), + version=self.getVersion() + ) + d = {} + for r in res: + d[r.getRevision()] = True + revs = d.keys() + revs.sort(reverse=True) + return revs + + security.declarePrivate('_checkCompleteCoordinates') + def _checkCompleteCoordinates(self): + """ + test if the doc has all coordinates + """ + reference = self.getReference() + version = self.getVersion() + language = self.getLanguage() + return (reference and version and language) security.declareProtected(Permissions.ModifyPortalContent, 'setNewRevision') - def setNewRevision(self): + def setNewRevision(self, immediate_reindex=False): """ Set a new revision number automatically Delegates to ZMI script because revision numbering policies can be different. Should be called by interaction workflow upon appropriate action. + + Sometimes we should reindex immediately, to avoid other doc setting + the same revision (if revisions are global and there is heavy traffic) """ - # Use portal_catalog without security + # Use portal_catalog without security (proxy roles on scripts) method = self._getTypeBasedMethod('getNewRevision', fallback_script_id = 'Document_getNewRevision') new_rev = method() self.setRevision(new_rev) + if immediate_reindex: + self.immediateReindexObject() + else: + self.reindexObject() security.declareProtected(Permissions.View, 'getLanguageList') def getLanguageList(self, version=None): @@ -593,7 +702,14 @@ class Document(XMLObject): # Approach 2: use workflow analysis (delegate to script if necessary) # workflow analysis is the only way for multiple orginals # XXX - cache or set? - pass + reference = self.getReference() + if not reference: + return + catalog = getToolByName(self, 'portal_catalog', None) + res = catalog(reference=self.getReference(), sort_on=(('creation_date','ascending'),)) + # XXX this should be security-unaware - delegate to script with proxy roles + return res[0].getLanguage() # XXX what happens if it is empty? + ### Property getters # Property Getters are document dependent so that we can @@ -619,8 +735,9 @@ class Document(XMLObject): """ # XXX this method should first make sure we have text content # or do a conversion - return self._getTypeBasedMethod('getPropertyDictFromContent', + method = self._getTypeBasedMethod('getPropertyDictFromContent', fallback_script_id='Document_getPropertyDictFromContent') + return method() security.declareProtected(Permissions.ModifyPortalContent,'getPropertyDictFromFileName') def getPropertyDictFromFileName(self, file_name): @@ -648,25 +765,26 @@ class Document(XMLObject): # disappear within a given transaction return kw + ### Metadata disovery and ingestion methods security.declareProtected(Permissions.ModifyPortalContent, 'discoverMetadata') def discoverMetadata(self, file_name=None, user_login=None): """ - This is the main metadata discovery function - controls the process - of discovering data from various sources. The discovery itself is - delegated to scripts or uses preferences-configurable regexps. + This is the main metadata discovery function - controls the process + of discovering data from various sources. The discovery itself is + delegated to scripts or uses preferences-configurable regexps. - file_name - this parameter is a file name of the form "AA-BBB-CCC-223-en" + file_name - this parameter is a file name of the form "AA-BBB-CCC-223-en" - user_login - this is a login string of a person; can be None if the user is - currently logged in, then we'll get him from session + user_login - this is a login string of a person; can be None if the user is + currently logged in, then we'll get him from session """ # Get the order # Preference is made of a sequence of 'user_login', 'content', 'file_name', 'input' method = self._getTypeBasedMethod('getPreferredDocumentMetadataDiscoveryOrderList', fallback_script_id = 'Document_getPreferredDocumentMetadataDiscoveryOrderList') - order_list = method() + order_list = list(method()) # Start with everything until content content_index = order_list.index('content') @@ -686,10 +804,15 @@ class Document(XMLObject): result = method(file_name) else: result = method() - kw.update(result) + if result is not None: + kw.update(result) # Edit content - self.edit(kw) + try: + del(kw['portal_type']) + except KeyError: + pass + self.edit(**kw) # Finish in second stage self.activate().finishMetadataDiscovery() @@ -697,20 +820,20 @@ class Document(XMLObject): security.declareProtected(Permissions.ModifyPortalContent, 'finishMetadataDiscovery') def finishMetadataDiscovery(self): """ - This is called by portal_activities, to leave time-consuming procedures - for later. It converts the OOoDocument (later maybe some other formats) and - does things that can be done only after it is converted). + This is called by portal_activities, to leave time-consuming procedures + for later. It converts what needs conversion to base, and + does things that can be done only after it is converted). """ # Get the order from preferences # Preference is made of a sequence of 'user_login', 'content', 'file_name', 'input' method = self._getTypeBasedMethod('getPreferredDocumentMetadataDiscoveryOrderList', fallback_script_id = 'Document_getPreferredDocumentMetadataDiscoveryOrderList') - order_list = method() + order_list = list(method()) # Start with everything until content content_index = order_list.index('content') - # Start with everything until content - build a dictionnary according to the order + # do content and everything that is later kw = {} for order_id in order_list[content_index:]: if order_id not in VALID_ORDER_KEY_LIST: @@ -724,10 +847,11 @@ class Document(XMLObject): result = method(file_name) else: result = method() - kw.update(result) + if result is not None: + kw.update(result) # Edit content - self.edit(kw) + self.edit(**kw) # Erase backup attributes delattr(self, '_backup_input')