diff --git a/product/ERP5/Document/Document.py b/product/ERP5/Document/Document.py index d14be510fbd5b168a7c3a9cbb800a0182299459d..7ed4b06cf86b9f0e4344f1e315db1d09582fa788 100644 --- a/product/ERP5/Document/Document.py +++ b/product/ERP5/Document/Document.py @@ -758,7 +758,8 @@ class Document(DocumentExtensibleTraversableMixin, XMLObject, UrlMixin, if method is not None: method() security.declareProtected(Permissions.ModifyPortalContent, 'updateContentFromURL') - def updateContentFromURL(self, repeat=MAX_REPEAT, crawling_depth=0): + def updateContentFromURL(self, repeat=MAX_REPEAT, crawling_depth=0, + repeat_interval=1, batch_mode=True): """ Download and update content of this document from its source URL. Implementation is handled by ContributionTool. diff --git a/product/ERP5/Tool/ContributionTool.py b/product/ERP5/Tool/ContributionTool.py index acd964f78885849fb86fe3688d93487e0cca31c6..b270b76a517cb68375792e23355f47f7bd8e9fa4 100644 --- a/product/ERP5/Tool/ContributionTool.py +++ b/product/ERP5/Tool/ContributionTool.py @@ -541,7 +541,8 @@ class ContributionTool(BaseTool): url_registry_tool.registerURL(url, None, context=container) security.declareProtected(Permissions.AddPortalContent, 'updateContentFromURL') - def updateContentFromURL(self, content, repeat=MAX_REPEAT, crawling_depth=0): + def updateContentFromURL(self, content, repeat=MAX_REPEAT, crawling_depth=0, + repeat_interval=1, batch_mode=True): """ Updates an existing content. """ @@ -555,20 +556,11 @@ class ContributionTool(BaseTool): try: url = content.asURL() file_object, filename, content_type = self._openURL(url) - except urllib2.HTTPError, error: - if repeat == 0: - # XXX - Call the extendBadURLList method,--NOT Implemented-- - # IDEA : ajouter l'url en question dans une list "bad_url_list" puis lors du crawling au lieu que de boucler sur - # la liste des url extraites de la page web on fait un test supplementaire qui verifie que l'url n'est pas - # dans la liste bad_url_lis - raise - content.activate(at_date=DateTime() + 1).updateContentFromURL(repeat=repeat - 1) - return except urllib2.URLError, error: - if repeat == 0: + if repeat == 0 or not batch_mode: # XXX - Call the extendBadURLList method,--NOT Implemented-- raise - content.activate(at_date=DateTime() + 1).updateContentFromURL(repeat=repeat - 1) + content.activate(at_date=DateTime() + repeat_interval).updateContentFromURL(repeat=repeat - 1) return content._edit(file=file_object, content_type=content_type) diff --git a/product/ERP5/interfaces/uploadable.py b/product/ERP5/interfaces/uploadable.py index 978dc5a750a7ede7a4539e35f28b666f8f1af7ca..169188513848cdf10fa8d29705f1e728555a45b0 100644 --- a/product/ERP5/interfaces/uploadable.py +++ b/product/ERP5/interfaces/uploadable.py @@ -94,7 +94,8 @@ class IUploadable(Interface): passed to IConvertable.convert or to IDownloadable.index_html """ - def updateContentFromURL(url=None, repeat=MAX_REPEAT, crawling_depth=0): + def updateContentFromURL(url=None, repeat=MAX_REPEAT, crawling_depth=0, + repeat_interval=1, batch_mode=True): """ Download and update content of this document from the specified URL. If no url is specified, Document which support the IUrlGetter @@ -103,11 +104,15 @@ class IUploadable(Interface): url -- optional URL to download the updated content from. required whenever document does not implement IUrlGetter - repeat -- optional max number of retries for download - crawling_depth -- optional crawling depth for documents which implement ICrawlable + repeat -- optional max number of retries for download + + repeat_interval -- optional interval between repeats + + batch_mode -- optional specify False if used in a user interface + NOTE: implementation is normally delegated to ContributionTool. XXX - it is unclear whether MAX_REPEAT should be part of signature