Commit f33d9b66 authored by Jérome Perrin's avatar Jérome Perrin Committed by Kazuhiko Shiozaki

DMS: handle gracefully unauthorized ingestion scenarios

Instead of using UnrestrictedMethod for the whole duration of
discoverMetadata, revert to the original user permissions for the last
part where we merge revision or change state, because we want this to
fail when user is not allowed.

This adjusts also the high level Base_contribute script to catch these
errors and report them nicely to the user and while doing this some
other problems were discovered and are also fixed.

Some problems in ContributionTool.newContent were fixed, this was
already trying to detect that a document is going to be merged and
synchronously checked that this existing document can be replaced, to
display an error to the user, but this part has two issues:
  - first it was using getMatchedFilenamePatternDict to find the
  document coordinates, but this method only supports
  preferred_document_filename_regular_expression capturing reference,
  but not the combination of node_reference and local_reference, so this
  was changed to use getPropertyDictFromFilename, which is what is
  actually used to compute the reference.
  - The second problem here was that this check was done with a
  standard restricted catalog search, but later mergeRevision uses an
  unrestricted search, so this was changed to use unrestricted catalog
  search to match mergeRevision

The testing also revealed that the PDFDocument._setFile hack to clear
the _content_information was only for _setFile, but not for _setData,
so this was extended to _setData as well, so that a case where the PDF
content is updated by setData also update content information.
parent fa4cb814
......@@ -395,4 +395,11 @@ class PDFDocument(Image):
del self._content_information
except (AttributeError, KeyError):
pass
Image._setFile(self, *args, **kw)
super(PDFDocument, self)._setFile(*args, **kw)
def _setData(self, *args, **kw):
try:
del self._content_information
except (AttributeError, KeyError):
pass
super(PDFDocument, self)._setData(*args, **kw)
......@@ -3055,6 +3055,94 @@ class TestDocumentWithSecurity(TestDocumentMixin):
user_pref.getPreferredThumbnailImageHeight()),
image.getSizeFromImageDisplay('thumbnail'))
def test_mergeRevision(self):
document1 = self.portal.portal_contributions.newContent(
file=makeFileUpload('TEST-en-002.doc'))
self.tic()
self.assertEqual(
(document1.getReference(), document1.getLanguage(), document1.getVersion()),
('TEST', 'en', '002'))
self.assertNotIn('This document is modified', document1.asText())
document2 = self.portal.portal_contributions.newContent(
file=makeFileUpload('TEST-en-002-modified.doc'))
self.tic()
self.assertIn('This document is modified', document2.asText())
self.assertEqual(
(document2.getReference(), document2.getLanguage(), document2.getVersion()),
('TEST', 'en', '002'))
# document was updated in-place
self.assertEqual(document1.getRelativeUrl(), document2.getRelativeUrl())
document2.manage_addLocalRoles(self.username, ['Assignor'])
document2.share()
self.tic()
# once the document is shared, the user can no longer edit it and trying to upload
# the same reference causes an error.
with self.assertRaisesRegex(
Unauthorized,
"You are not allowed to update the existing document"):
self.portal.portal_contributions.newContent(file=makeFileUpload('TEST-en-002.doc'))
# this also works with another user which can not see the document
another_user_id = self.id()
uf = self.portal.acl_users
uf._doAddUser(another_user_id, '', ['Author'], [])
newSecurityManager(None, uf.getUserById(another_user_id).__of__(uf))
with self.assertRaisesRegex(
Unauthorized,
"You are not allowed to update the existing document"):
self.portal.portal_contributions.newContent(file=makeFileUpload('TEST-en-002.doc'))
def test_mergeRevision_with_node_reference_local_reference_filename_regular_expression(self):
# this filename regular expression comes from configurator
self.getDefaultSystemPreference().setPreferredDocumentFilenameRegularExpression(
"(?P<node_reference>[a-zA-Z0-9_-]+)-(?P<local_reference>[a-zA-Z0-9_.]+)-(?P<version>[0-9a-zA-Z.]+)-(?P<language>[a-z]{2})[^-]*?"
)
self.tic()
document1 = self.portal.portal_contributions.newContent(
file=makeFileUpload('TEST-en-002.doc', as_name='P-PROJ-TEST-002-en.doc'))
self.tic()
self.assertEqual(
(document1.getReference(), document1.getLanguage(), document1.getVersion()),
('P-PROJ-TEST', 'en', '002'))
self.assertNotIn('This document is modified', document1.asText())
document2 = self.portal.portal_contributions.newContent(
file=makeFileUpload('TEST-en-002-modified.doc', as_name='P-PROJ-TEST-002-en.doc'))
self.tic()
self.assertIn('This document is modified', document2.asText())
self.assertEqual(
(document2.getReference(), document2.getLanguage(), document2.getVersion()),
('P-PROJ-TEST', 'en', '002'))
# document was updated in-place
self.assertEqual(document1.getRelativeUrl(), document2.getRelativeUrl())
document2.manage_addLocalRoles(self.username, ['Assignor'])
document2.share()
self.tic()
# once the document is shared, the user can no longer edit it and trying to upload
# the same reference causes an error.
with self.assertRaisesRegex(
Unauthorized,
"You are not allowed to update the existing document"):
self.portal.portal_contributions.newContent(
file=makeFileUpload('TEST-en-002.doc', as_name='P-PROJ-TEST-002-en.doc'))
# this also works with another user which can not see the document
another_user_id = self.id()
uf = self.portal.acl_users
uf._doAddUser(another_user_id, '', ['Author'], [])
newSecurityManager(None, uf.getUserById(another_user_id).__of__(uf))
with self.assertRaisesRegex(
Unauthorized,
"You are not allowed to update the existing document"):
self.portal.portal_contributions.newContent(
file=makeFileUpload('TEST-en-002.doc', as_name='P-PROJ-TEST-002-en.doc'))
class TestDocumentPerformance(TestDocumentMixin):
def test_01_LargeOOoDocumentToImageConversion(self):
......
......@@ -4,6 +4,8 @@
Use to contribute file to ERP5.
"""
from ZTUtils import make_query
from zExceptions import Unauthorized
from Products.CMFCore.WorkflowCore import WorkflowException
portal = context.getPortalObject()
translateString = portal.Base_translateString
......@@ -53,38 +55,55 @@ if attach_document_to_context:
document_kw['follow_up_list'] = follow_up_list
document_kw.update({'discover_metadata': not synchronous_metadata_discovery})
if url is not None:
# we contribute and URL, this happens entirely asynchronous
document = portal_contributions.newContentFromURL(url = url, \
repeat = max_repeat, \
batch_mode = batch_mode, \
**document_kw)
if document is None:
# portal contributions could not upload it
if cancel_url is not None:
# we can assume we can redirect
redirect_url= '%s?%s' %(cancel_url,
make_query(dict(portal_status_message=translateString("Wrong or not accessible URL address."))))
return context.REQUEST.RESPONSE.redirect(redirect_url)
else:
# contribute file
document_kw.update({'file': file})
document = portal_contributions.newContent(**document_kw)
is_existing_document_updated = False
if synchronous_metadata_discovery:
# we need to do all synchronously, in other case portal_contributions will do
# this in an activity
if document.isSupportBaseDataConversion():
document.processFile()
filename = document.getFilename()
merged_document = document.Document_convertToBaseFormatAndDiscoverMetadata(
filename=filename,
user_login=user_login,
input_parameter_dict=document_kw)
is_existing_document_updated = (merged_document!=document)
document = merged_document
try:
if url is not None:
# we contribute and URL, this happens entirely asynchronous
document = portal_contributions.newContentFromURL(url = url, \
repeat = max_repeat, \
batch_mode = batch_mode, \
**document_kw)
if document is None:
# portal contributions could not upload it
if cancel_url is not None:
# we can assume we can redirect
redirect_url= '%s?%s' %(cancel_url,
make_query(dict(portal_status_message=translateString("Wrong or not accessible URL address."))))
return context.REQUEST.RESPONSE.redirect(redirect_url)
else:
# contribute file
batch_mode = batch_mode or not (redirect_to_context or redirect_to_document or redirect_url is not None)
document_kw.update({'file': file})
document = portal_contributions.newContent(**document_kw)
is_existing_document_updated = False
if synchronous_metadata_discovery:
# we need to do all synchronously, in other case portal_contributions will do
# this in an activity
if document.isSupportBaseDataConversion():
document.processFile()
filename = document.getFilename()
merged_document = document.Document_convertToBaseFormatAndDiscoverMetadata(
filename=filename,
user_login=user_login,
input_parameter_dict=document_kw)
is_existing_document_updated = (merged_document!=document)
document = merged_document
except (Unauthorized, WorkflowException) as e:
if batch_mode:
raise
if isinstance(e, WorkflowException):
message = translateString('You are not allowed to contribute document in that state.')
else:
if 'You are not allowed to update the existing document' not in str(e):
raise
message = translateString(
'You are not allowed to update the existing document which has the same coordinates.')
return context.Base_redirect(
'view',
abort_transaction=True,
keep_items={
'portal_status_message': message,
'portal_status_level': 'error',
})
document_portal_type = document.getTranslatedPortalType()
if not is_existing_document_updated:
......@@ -94,7 +113,7 @@ else:
message = translateString('${portal_type} updated successfully.',
mapping=dict(portal_type=document_portal_type))
if redirect_to_context or redirect_to_document or redirect_url is not None:
if not batch_mode:
# this is an UI mode where script should handle HTTP redirects and is likely used
# by ERP5 form
if redirect_to_document and document is not None:
......
......@@ -120,24 +120,10 @@ class DiscoverableMixin(CachedConvertableMixin):
return {}
### Metadata disovery and ingestion methods
security.declareProtected(Permissions.ModifyPortalContent,
'discoverMetadata')
@UnrestrictedMethod
def discoverMetadata(self, filename=None, user_login=None,
input_parameter_dict=None):
"""
This is the main metadata discovery function - controls the process
of discovering data from various sources. The discovery itself is
delegated to scripts or uses preference-configurable regexps. The
method returns either self or the document which has been
merged in the discovery process.
filename - this parameter is a file name of the form "AA-BBB-CCC-223-en"
user_login - this is a login string of a person; can be None if the user is
currently logged in, then we'll get him from session
input_parameter_dict - arguments provided to Create this content by user.
"""
def _unrestrictedDiscoverMetadata(
self, filename=None, user_login=None, input_parameter_dict=None):
"""Unrestricted part of metadata discovery"""
if input_parameter_dict is None:
input_parameter_dict = {}
# Preference is made of a sequence of 'user_login', 'content', 'filename', 'input'
......@@ -189,6 +175,31 @@ class DiscoverableMixin(CachedConvertableMixin):
if portal_type != self.getPortalType():
return self.migratePortalType(portal_type)
security.declareProtected(Permissions.ModifyPortalContent,
'discoverMetadata')
def discoverMetadata(self, filename=None, user_login=None,
input_parameter_dict=None):
"""
This is the main metadata discovery function - controls the process
of discovering data from various sources. The discovery itself is
delegated to scripts or uses preference-configurable regexps. The
method returns either self or the document which has been
merged in the discovery process.
filename - this parameter is a file name of the form "AA-BBB-CCC-223-en"
user_login - this is a login string of a person; can be None if the user is
currently logged in, then we'll get him from session
input_parameter_dict - arguments provided to Create this content by user.
"""
discovered_document = self._unrestrictedDiscoverMetadata(
filename=filename, user_login=user_login, input_parameter_dict=input_parameter_dict)
if discovered_document is not None:
return discovered_document
if input_parameter_dict is None:
input_parameter_dict = {}
def maybeChangeState(document):
publication_state = input_parameter_dict.get('publication_state')
if publication_state and document.getValidationState() == 'draft':
......
......@@ -223,26 +223,27 @@ class ContributionTool(BaseTool):
return response.redirect(self.absolute_url())
return document
#
# Check if same file is already exists. if it exists, then update it.
#
property_dict = self.getMatchedFilenamePatternDict(filename)
reference = property_dict.get('reference', None)
version = property_dict.get('version', None)
language = property_dict.get('language', None)
if portal_type and reference and version and language:
portal_catalog = portal.portal_catalog
document = portal_catalog.getResultValue(portal_type=portal_type,
reference=reference,
version=version,
language=language)
if document is not None:
# document is already uploaded. So overrides file.
if not _checkPermission(Permissions.ModifyPortalContent, document):
raise Unauthorized("[DMS] You are not allowed to update the existing document which has the same coordinates (id %s)" % document.getId())
document.edit(file=kw['file'])
return document
# If we use discover metadata API, this will be during discoverMetadata and
# mergeRevision.
if discover_metadata:
property_dict = self.getPropertyDictFromFilename(filename)
reference = property_dict.get('reference', None)
version = property_dict.get('version', None)
language = property_dict.get('language', None)
if portal_type and reference and version and language:
document = portal.portal_catalog.unrestrictedGetResultValue(
portal_type=portal_type,
reference=reference,
version=version,
language=language)
if document is not None:
# document is already uploaded. So overrides file.
if not _checkPermission(Permissions.ModifyPortalContent, document):
raise Unauthorized("[DMS] You are not allowed to update the existing document which has the same coordinates (id %s)" % document.getId())
document.edit(file=kw['file'])
return document
# Temp objects use the standard newContent from Folder
if temp_object:
# For temp_object creation, use the standard method
......
......@@ -29,22 +29,25 @@
#
##############################################################################
import io
import unittest
import os
import StringIO
from cgi import FieldStorage
from lxml import etree
from AccessControl.SecurityManagement import newSecurityManager
from AccessControl import Unauthorized
from DateTime import DateTime
from Products.ERP5Type.Utils import convertToUpperCase
from Products.ERP5Type.tests.ERP5TypeTestCase import (
ERP5TypeTestCase, _getConversionServerUrlList)
from Products.CMFCore.WorkflowCore import WorkflowException
from Products.ERP5Type.tests.Sequence import SequenceList
from Products.ERP5Type.tests.utils import FileUpload, removeZODBPythonScript, \
createZODBPythonScript
from Products.ERP5OOo.OOoUtils import OOoBuilder
from Products.CMFCore.utils import getToolByName
from zExceptions import BadRequest
from zExceptions import Redirect
import ZPublisher.HTTPRequest
from unittest import expectedFailure
import urllib
......@@ -78,6 +81,15 @@ class IngestionTestCase(ERP5TypeTestCase):
'erp5_ingestion', 'erp5_ingestion_mysql_innodb_catalog',
'erp5_web', 'erp5_crm', 'erp5_dms')
def afterSetUp(self):
self.setSystemPreference()
def setSystemPreference(self):
default_pref = self.getDefaultSystemPreference()
default_pref.setPreferredRedirectToDocument(False)
default_pref.setPreferredDocumentFilenameRegularExpression(FILENAME_REGULAR_EXPRESSION)
default_pref.setPreferredDocumentReferenceRegularExpression(REFERENCE_REGULAR_EXPRESSION)
def beforeTearDown(self):
# cleanup modules
module_id_list = """web_page_module
......@@ -136,13 +148,8 @@ class TestIngestion(IngestionTestCase):
self.portal_categories = self.getCategoryTool()
self.portal_catalog = self.getCatalogTool()
self.createDefaultCategoryList()
self.setSystemPreference()
self.setSimulatedNotificationScript()
def setSystemPreference(self):
default_pref = self.getDefaultSystemPreference()
default_pref.setPreferredDocumentFilenameRegularExpression(FILENAME_REGULAR_EXPRESSION)
default_pref.setPreferredDocumentReferenceRegularExpression(REFERENCE_REGULAR_EXPRESSION)
super(TestIngestion, self).afterSetUp()
def setSimulatedNotificationScript(self, sequence=None, sequence_list=None, **kw):
"""
......@@ -2108,7 +2115,7 @@ class Base_contributeMixin:
"""
person = self.portal.person_module.newContent(portal_type='Person')
empty_file_upload = ZPublisher.HTTPRequest.FileUpload(FieldStorage(
fp=StringIO.StringIO(),
fp=io.BytesIO(),
environ=dict(REQUEST_METHOD='PUT'),
headers={"content-disposition":
"attachment; filename=empty;"}))
......@@ -2266,3 +2273,120 @@ class TestBase_contributeWithSecurity(IngestionTestCase, Base_contributeMixin):
uf._doAddUser(self.id(), self.newPassword(), ['Associate', 'Assignor', 'Author'], [])
user = uf.getUserById(self.id()).__of__(uf)
newSecurityManager(None, user)
def test_Base_contribute_mergeRevision(self):
person = self.portal.person_module.newContent(portal_type='Person')
ret = person.Base_contribute(
redirect_to_context=True,
synchronous_metadata_discovery=True,
file=makeFileUpload('TEST-en-002.pdf'))
self.assertIn(
('portal_status_message', 'PDF created successfully.'),
urlparse.parse_qsl(urlparse.urlparse(ret).query))
document, = self.portal.document_module.contentValues()
self.assertEqual(
(document.getReference(), document.getLanguage(), document.getVersion()),
('TEST', 'en', '002'))
self.assertEqual(document.getValidationState(), 'draft')
document.setData(b'')
self.tic()
# when updating, the message is different
ret = person.Base_contribute(
redirect_to_context=True,
synchronous_metadata_discovery=True,
file=makeFileUpload('TEST-en-002.pdf'))
self.assertIn(
('portal_status_message', 'PDF updated successfully.'),
urlparse.parse_qsl(urlparse.urlparse(ret).query))
document, = self.portal.document_module.contentValues()
self.assertEqual(
(document.getReference(), document.getLanguage(), document.getVersion()),
('TEST', 'en', '002'))
self.assertEqual(document.getValidationState(), 'draft')
self.assertIn(b'%PDF', document.getData())
# change to a state where user can not edit the document
document.setData(b'')
document.share()
self.tic()
for synchronous_metadata_discovery in True, False:
with self.assertRaises(Redirect) as ctx:
person.Base_contribute(
redirect_to_context=True,
synchronous_metadata_discovery=synchronous_metadata_discovery,
file=makeFileUpload('TEST-en-002.pdf'))
self.assertIn(
('portal_status_message',
'You are not allowed to update the existing document which has the same coordinates.'),
urlparse.parse_qsl(urlparse.urlparse(str(ctx.exception)).query))
self.assertIn(
('portal_status_level', 'error'),
urlparse.parse_qsl(urlparse.urlparse(str(ctx.exception)).query))
# document is not updated
self.assertEqual(document.getData(), b'')
# when using the script directly it's an error
with self.assertRaisesRegex(
Unauthorized,
"You are not allowed to update the existing document which has the same coordinates"):
person.Base_contribute(
synchronous_metadata_discovery=synchronous_metadata_discovery,
file=makeFileUpload('TEST-en-002.pdf'))
self.assertEqual(document.getData(), b'')
def test_Base_contribute_publication_state_unauthorized(self):
# When user is not allowed to publish a document, they can not use publication_state="published"
# option of Base_contribute
user_id = self.id() + '-author'
uf = self.portal.acl_users
uf._doAddUser(user_id, self.newPassword(), ['Author'], [])
user = uf.getUserById(user_id).__of__(uf)
newSecurityManager(None, user)
person = self.portal.person_module.newContent(portal_type='Person')
# with redirect_to_context option (like in the UI Dialog), we have a nice
# status message
with self.assertRaises(Redirect) as ctx:
person.Base_contribute(
publication_state='published',
redirect_to_context=True,
synchronous_metadata_discovery=True,
file=makeFileUpload('TEST-en-002.pdf'))
self.assertIn(
('portal_status_message', 'You are not allowed to contribute document in that state.'),
urlparse.parse_qsl(urlparse.urlparse(str(ctx.exception)).query))
self.assertIn(
('portal_status_level', 'error'),
urlparse.parse_qsl(urlparse.urlparse(str(ctx.exception)).query))
# when using the script directly it's an error
with self.assertRaisesRegex(
WorkflowException,
"Transition document_publication_workflow/publish unsupported"):
person.Base_contribute(
publication_state='published',
synchronous_metadata_discovery=True,
file=makeFileUpload('TEST-en-002.pdf'))
# when using asynchronous metadata discovery, an error occurs in activity,
# but not document is published
person.Base_contribute(
publication_state='published',
redirect_to_context=True,
synchronous_metadata_discovery=False,
file=makeFileUpload('TEST-en-002.pdf'))
with self.assertRaisesRegex(
Exception,
"Transition document_publication_workflow/publish unsupported"):
self.tic()
self.assertEqual(
{doc.getValidationState() for doc in self.portal.document_module.contentValues()},
set(['draft']))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment