diff --git a/product/ERP5/Document/Document.py b/product/ERP5/Document/Document.py index 45048fafbee7d1d64fccbd18db16d435a1b0a10a..910ff73f2f3beeee053a6bf7b0a5d6ef6763660d 100644 --- a/product/ERP5/Document/Document.py +++ b/product/ERP5/Document/Document.py @@ -29,7 +29,7 @@ from DateTime import DateTime from operator import add -from AccessControl import ClassSecurityInfo +from AccessControl import ClassSecurityInfo, getSecurityManager from Products.ERP5Type import Permissions, PropertySheet, Constraint, Interface from Products.ERP5Type.XMLObject import XMLObject from Products.ERP5Type.WebDAVSupport import TextContent @@ -167,7 +167,7 @@ class Document(XMLObject): Document classes which implement conversion should use the ConversionCacheMixin class so that converted values are - stored. + stored inside ZODB and do not need to be recalculated. XXX IDEA - ISSUE: generic API for conversion. converted_document = document.convert(...) @@ -180,47 +180,73 @@ class Document(XMLObject): (1) portal type detection (2) object creation and upload of data (3) metadata discovery (optionally with conversion of data to another format) - (4) other possible actions + (4) other possible actions to finalise the ingestion (ex. by assigning + a reference) This class handles (3) and calls a ZMI script to do (4). Metadata can be drawn from various sources: - input - data supplied with http request or set on the object during (2) (e.g. - discovered from email text) - file_name - data which might be encoded in file name - user_login- information about user who is contributing the file - content - data which might be derived from document content + input - data supplied with http request or set on the object during (2) (e.g. + discovered from email text) + file_name - data which might be encoded in file name + user_login - information about user who is contributing the file + content - data which might be derived from document content If a certain property is defined in more than one source, it is set according to preference order returned by a script - Document_getPreferredDocumentMetadataDiscoveryOrderList (or type-based version). + Document_getPreferredDocumentMetadataDiscoveryOrderList + (or any type-based version since discovery is type dependent) + Methods for discovering metadata are: + getPropertyDictFromInput getPropertyDictFromFileName getPropertyDictFromUserLogin getPropertyDictFromContent - The Document class behaviour can be extended / customized through scripts - (which are type-based so can be adjusted per portal type). + Methods for processing content are implemented either in + Document class or in Base class: + + getSearchableReferenceList (Base) + getSearchableText (Base) + index_html (Document) + + Methods for handling relations are implemented either in + Document class or in Base class: + + getImplicitSuccessorValueList (Base) + getImplicitPredecessorValueList (Base) + getImplicitSimilarValueList (Base) + getSimilarCloudValueList (Document) + + Implicit relations consist in finding document references inside + searchable text (ex. INV-23456) and deducting relations from that. + Two customisable methods required. One to find a list of implicit references + inside the content (getSearchableReferenceList) and one to convert a given + document reference into a list of reference strings which could + be present in other content (asSearchableReferenceList). - * Document_getFilenameParsingRegexp - returns a regular expression for extracting - properties encoded in file name + document.getSearchableReferenceList() returns + [ + {'reference':' INV-12367'}, + {'reference': 'INV-1112', 'version':'012}', + {'reference': 'AB-CC-DRK', 'version':'011', 'language': 'en'} + ] - * Document_getReferenceLookupRegexp - returns a regular expression for finding - references to documents within document text content + The Document class behaviour can be extended / customized through scripts + (which are type-based so can be adjusted per portal type). - * Document_getPropertyListFromUser - finds a user (by user_login or from session) + * Document_getPropertyDictFromUserLogin - finds a user (by user_login or from session) and returns properties which should be set on the document - * Document_getPropertyListFromContent - analyzes document content and returns + * Document_getPropertyDictFromContent - analyzes document content and returns properties which should be set on the document - * Document_findImplicitSuccessor - finds appropriate version of a document - based on coordinates (which can be incomplete, depending if a document reference - found in text content contained version and/or language) + * Base_getImplicitSuccesorValueList - finds appropriate all documents + referenced in the current content - * Document_findImplicitPredecessorList - finds document predecessors based on + * Base_getImplicitPredecessorValueList - finds document predecessors based on the document coordinates (can use only complete coordinates, or also partial) * Document_getPreferredDocumentMetadataDiscoveryOrderList - returns an order @@ -230,11 +256,10 @@ class Document(XMLObject): is completed (and after document has been converted, so text_content is available if the document has it) - * Document_getNewRevisionNumber - calculates revision number which should be set + * Document_getNewRevision - calculates revision number which should be set on this document. Implementation depends on revision numbering policy which can be very different. Interaction workflow should call setNewRevision method. - Subcontent: documents may include subcontent (files, images, etc.) so that publication of rich content can be path independent. @@ -272,7 +297,17 @@ class Document(XMLObject): 'subject', 'source_reference', 'source_project_title') - ### Content indexing methods + ### Content processing methods + def index_html(self, REQUEST, RESPONSE, format=None, **kw): + """ + We follow here the standard Zope API for files and images + and extend it to support format conversion. + + format - the format specied in the form of an extension + string (ex. jpeg, html, text, txt, etc.) + """ + pass + security.declareProtected(Permissions.View, 'getSearchableText') def getSearchableText(self, md=None): """ @@ -304,47 +339,57 @@ class Document(XMLObject): SearchableText = getSearchableText # XXX-JPS - Here wa have a security issue - ask seb what to do ### Relation getters - def _getImplicitSuccessorReferenceList(self): + def getSearchableReferenceList(self): """ - Private Implementation Method + Public Method - Find references in text_content, return matches - with this we can then find objects - The reference regexp defined in Document_getFilenameParsingRegexp should - contain named groups (usually reference, version, language) - which make keys of the dictionary returned by this function - This function returns a list of dictionaries. - """ - if getattr(self,'getTextContent',_MARKER) is _MARKER: - return [] - if self.getTextContent() is None: - return [] + This method returns a list of dictionaries which can + be used to find objects by reference. It uses for + that a regular expression defined at system level + preferences. + """ + text = self.getSearchableText() + regexp = self.getPreferredReferenceLookupRegexp() try: - method = self._getTypeBasedMethod('getReferenceLookupRegexp', - fallback_script_id = 'Document_getReferenceLookupRegexp') - rx_search = method() + rx_search = re.compile(regexp) except TypeError: # no regexp in preference self.log('please set document reference regexp in preferences') return [] - res = rx_search.finditer(self.getTextContent()) + res = rx_search.finditer(text) res = [(r.group(),r.groupdict()) for r in res] return res - + security.declareProtected(Permissions.View, 'getImplicitSuccessorValueList') def getImplicitSuccessorValueList(self): """ Find objects which we are referencing (if our text_content contains - references of other documents). The actual search is delegated to - Document_findImplicitSuccessor script. We can use only complete coordinate - triplets (reference-version-language) or also partial (e.g. reference only). - Normally, Document_findImplicitSuccessor would use getLatestVersionValue to - return only the most recent/relevant version. + references of other documents). The whole implementation is delegated to + Document_getImplicitSuccessorValueList script. + + The implementation goes in 2 steps: + + - Step 1: extract with a regular expression + a list of distionaries with various parameters such as + reference, portal_type, language, version, user, etc. This + part is configured through a portal preference. + + - Step 2: read the list of dictionaries + and build a list of values by calling portal_catalog + with appropriate parameters (and if possible build + a complex query whenever this becomes available in + portal catalog) + + The script is reponsible for calling getSearchableReferenceList + so that it can use another approach if needed. + + NOTE: passing a group_by parameter may be useful at a + later stage of the implementation. """ # XXX results should be cached as volatile attributes # XXX-JPS - Please use TransactionCache in ERP5Type for this # TransactionCache does all the work for you lst = [] - for ref in self._getImplicitSuccessorReferenceList(): + for ref in self.getSearchableReferenceList(): r = ref[1] res = self.Document_findImplicitSuccessor(**r) if len(res)>0: @@ -355,12 +400,21 @@ class Document(XMLObject): def getImplicitPredecessorValueList(self): """ This function tries to find document which are referencing us - by reference only, or - by reference/language etc. - Uses customizeable script Document_findImplicitPredecessorList. - - It is mostly implementation level - depends on what parameters we use to identify - document, and on how a doc must reference me to be my predecessor (reference only, - or with a language, etc + by reference/language etc. Implementation is passed to + Document_getImplicitPredecessorValueList + + The script should proceed in two steps: + + Step 1: build a list of references out of the context + (ex. INV-123456, 123456, etc.) + + Step 2: search using the portal_catalog and use + priorities (ex. INV-123456 before 123456) + ( if possible build a complex query whenever + this becomes available in portal catalog ) + + NOTE: passing a group_by parameter may be useful at a + later stage of the implementation. """ # XXX results should be cached as volatile attributes method = self._getTypeBasedMethod('findImplicitPredecessorList', @@ -414,8 +468,7 @@ class Document(XMLObject): return lista_latest.keys() - - ### Version and language getters + ### Version and language getters - might be moved one day to a mixin class in base security.declareProtected(Permissions.View, 'getLatestVersionValue') def getLatestVersionValue(self, language=None): """ @@ -429,7 +482,7 @@ class Document(XMLObject): in any language or in the user language if the version is the same. """ - # User portal_catalog + # Use portal_catalog pass security.declareProtected(Permissions.View, 'getVersionValueList') @@ -438,7 +491,7 @@ class Document(XMLObject): Returns a list of documents with same reference, same portal_type but different version and given language or any language if not given. """ - # User portal_catalog + # Use portal_catalog pass security.declareProtected(Permissions.View, 'isVersionUnique') @@ -446,7 +499,7 @@ class Document(XMLObject): """ Returns true if no other document has the same version and language """ - # User portal_catalog + # Use portal_catalog pass security.declareProtected(Permissions.View, 'getLatestRevisionValue') @@ -454,7 +507,7 @@ class Document(XMLObject): """ Returns the latest revision of ourselves """ - # User portal_catalog + # Use portal_catalog pass security.declareProtected(Permissions.View, 'getRevisionValueList') @@ -462,7 +515,7 @@ class Document(XMLObject): """ Returns a list revision strings for a given reference, version, language """ - # User portal_catalog + # Use portal_catalog pass security.declareProtected(Permissions.ModifyPortalContent, 'setNewRevision') @@ -472,9 +525,9 @@ class Document(XMLObject): Delegates to ZMI script because revision numbering policies can be different. Should be called by interaction workflow upon appropriate action. """ - # User portal_catalog without security - method = self._getTypeBasedMethod('getNewRevisionNumber', - fallback_script_id = 'Document_getNewRevisionNumber') + # Use portal_catalog without security + method = self._getTypeBasedMethod('getNewRevision', + fallback_script_id = 'Document_getNewRevision') new_rev = method() self.setRevision(new_rev) @@ -484,7 +537,7 @@ class Document(XMLObject): Returns a list of languages which this document is available in for the current user. """ - # User portal_catalog + # Use portal_catalog pass security.declareProtected(Permissions.View, 'getOriginalLanguage') @@ -509,9 +562,10 @@ class Document(XMLObject): returns properties which should be set on the document """ if user_login is None: - user_login = self.portal_something.getUserLogin() - return self._getTypeBasedMethod('getPropertyDictFromUserLogin', + user_login = str(getSecurityManager().getUser()) + method = self._getTypeBasedMethod('getPropertyDictFromUserLogin', fallback_script_id='Document_getPropertyDictFromUserLogin') + return method() security.declareProtected(Permissions.ModifyPortalContent,'getPropertyDictFromContent') def getPropertyDictFromContent(self): @@ -519,6 +573,8 @@ class Document(XMLObject): Based on the document content, find out as many properties as needed. returns properties which should be set on the document """ + # XXX this method should first make sure we have text content + # or do a conversion return self._getTypeBasedMethod('getPropertyDictFromContent', fallback_script_id='Document_getPropertyDictFromContent') @@ -577,7 +633,7 @@ class Document(XMLObject): for order_id in order_list[0:content_index-1]: if order_id not in VALID_ORDER_KEY_LIST: # Prevent security attack or bad preferences - raise AttributeError, "explain what..." + raise AttributeError, "%s is not in valid order key list" % order_id method_id = 'getPropertyDictFrom%s' % convertToUpperCase(order_id) method = getattr(self, method_id) if order_id == 'file_name': @@ -615,7 +671,7 @@ class Document(XMLObject): for order_id in order_list[content_index:]: if order_id not in VALID_ORDER_KEY_LIST: # Prevent security attack or bad preferences - raise AttributeError, "explain what..." + raise AttributeError, "%s is not in valid order key list" % order_id method_id = 'getPropertyDictFrom%s' % convertToUpperCase(order_id) method = getattr(self, method_id) if order_id == 'file_name':