Commit 23e06437 authored by Douglas's avatar Douglas

erp5 kernel: improved detection of variables from user context that cannot be put in the zodb

Variables are investigated, recursively in case of container objects (like lists, for example),
to detect if they can be stored in the ZODB.
In this investigation persistent objects are identified by being an instance of the object
class and implementing a `__getstate__` method that raises no exception. If the variable is
not a Persistent object then we try to pickle and load it.

While developing the pickleable object identification a complication was found. It seems that
the code cannot capture cPickle.PicklingError in the usual way, `except cPickle.PicklingError`.
It's consequence of some weirdness with regards to pickle/cPickle modules exceptions classes and
more about it can be read at http://bugs.python.org/issue1457119. So, the workaround for this complication
was to catch all exceptions and check the exception class name as string.

The whole check for zodb persistence was moved into an utility function for the sake of readability
and code maintenance.

The Base_executeJupyter script object was transformed into an extension to be able to properly handle
transaction errors and render them correctly inside Jupyter.
parent 11eb5168
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from cStringIO import StringIO from cStringIO import StringIO
import cPickle
from erp5.portal_type import Image from erp5.portal_type import Image
from types import ModuleType from types import ModuleType
from ZODB.serialize import ObjectWriter from ZODB.serialize import ObjectWriter
import sys import sys
import traceback import traceback
import ast import ast
import base64 import base64
import cPickle import json
import transaction import transaction
import Acquisition import Acquisition
...@@ -20,6 +22,109 @@ from IPython.core.display import DisplayObject ...@@ -20,6 +22,109 @@ from IPython.core.display import DisplayObject
from IPython.lib.display import IFrame from IPython.lib.display import IFrame
def Base_executeJupyter(self, python_expression=None, reference=None, title=None, request_reference=False, **kw):
# Check permissions for current user and display message to non-authorized user
if not self.Base_checkPermission('portal_components', 'Manage Portal'):
return "You are not authorized to access the script"
# Convert the request_reference argument string to their respeced boolean values
request_reference = {'True': True, 'False': False}.get(request_reference, False)
# Return python dictionary with title and reference of all notebooks
# for request_reference=True
if request_reference:
data_notebook_list = self.portal_catalog(portal_type='Data Notebook')
notebook_detail_list = [{'reference': obj.getReference(), 'title': obj.getTitle()} for obj in data_notebook_list]
return notebook_detail_list
if not reference:
message = "Please set or use reference for the notebook you want to use"
return message
# Take python_expression as '' for empty code from jupyter frontend
if not python_expression:
python_expression = ''
# Get Data Notebook with the specific reference
data_notebook = self.portal_catalog.getResultValue(portal_type='Data Notebook',
reference=reference)
# Create new Data Notebook if reference doesn't match with any from existing ones
if not data_notebook:
notebook_module = self.getDefaultModule(portal_type='Data Notebook')
data_notebook = notebook_module.DataNotebookModule_addDataNotebook(
title=title,
reference=reference,
batch_mode=True
)
# Add new Data Notebook Line to the Data Notebook
data_notebook_line = data_notebook.DataNotebook_addDataNotebookLine(
notebook_code=python_expression,
batch_mode=True
)
# Gets the context associated to the data notebook being used
old_notebook_context = data_notebook.getNotebookContext()
if not old_notebook_context:
old_notebook_context = self.Base_createNotebookContext()
# Pass all to code Base_runJupyter external function which would execute the code
# and returns a dict of result
final_result = Base_runJupyterCode(self, python_expression, old_notebook_context)
new_notebook_context = final_result['notebook_context']
result = {
u'code_result': final_result['result_string'],
u'ename': final_result['ename'],
u'evalue': final_result['evalue'],
u'traceback': final_result['traceback'],
u'status': final_result['status'],
u'mime_type': final_result['mime_type'],
}
# Updates the context in the notebook with the resulting context of code
# execution.
data_notebook.setNotebookContext(new_notebook_context)
# We try to commit, but the notebook context property may have variables that
# cannot be serialized into the ZODB and couldn't be captured by our code yet.
# In this case we abort the transaction and warn the user about it. Unforunately,
# the exeception raised when this happens doesn't help to know exactly which
# object caused the problem, so we cannot tell the user what to fix.
try:
transaction.commit()
except transaction.interfaces.TransactionError as e:
transaction.abort()
exception_dict = getErrorMessageForException(self, e, new_notebook_context)
result.update(exception_dict)
return json.dumps(result)
# Catch exception while seriaizing the result to be passed to jupyter frontend
# and in case of error put code_result as None and status as 'error' which would
# be shown by Jupyter frontend
try:
serialized_result = json.dumps(result)
except UnicodeDecodeError:
result = {
u'code_result': None,
u'ename': u'UnicodeDecodeError',
u'evalue': None,
u'traceback': None,
u'status': u'error',
u'mime_type': result['mime_type']
}
serialized_result = json.dumps(result)
data_notebook_line.edit(
notebook_code_result=result['code_result'],
mime_type=result['mime_type']
)
return serialized_result
def Base_runJupyterCode(self, jupyter_code, old_notebook_context): def Base_runJupyterCode(self, jupyter_code, old_notebook_context):
""" """
Function to execute jupyter code and update the context dictionary. Function to execute jupyter code and update the context dictionary.
...@@ -186,7 +291,7 @@ def Base_runJupyterCode(self, jupyter_code, old_notebook_context): ...@@ -186,7 +291,7 @@ def Base_runJupyterCode(self, jupyter_code, old_notebook_context):
code = compile(value['code'], '<string>', 'exec') code = compile(value['code'], '<string>', 'exec')
exec(code, user_context, user_context) exec(code, user_context, user_context)
# An error happened, so we show the user the stacktrace along with a # An error happened, so we show the user the stacktrace along with a
# note that the exception happened in a setup funtion's code. # note that the exception happened in a setup function's code.
except Exception as e: except Exception as e:
if value['func_name'] in user_context: if value['func_name'] in user_context:
del user_context[value['func_name']] del user_context[value['func_name']]
...@@ -261,53 +366,30 @@ def Base_runJupyterCode(self, jupyter_code, old_notebook_context): ...@@ -261,53 +366,30 @@ def Base_runJupyterCode(self, jupyter_code, old_notebook_context):
mime_type = display_data['mime_type'] or mime_type mime_type = display_data['mime_type'] or mime_type
result_string += "\n".join(removed_setup_message_list) + result.getvalue() + display_data['result'] result_string += "\n".join(removed_setup_message_list) + result.getvalue() + display_data['result']
# Checking in the user context what variables are pickleable and we can store # Saves a list of all the variables we injected into the user context and
# safely. Everything that is not pickleable shall not be stored and the user # shall be deleted before saving the context.
# needs to be warned about it.
volatile_variable_list = current_setup_dict.keys() + inject_variable_dict.keys() + user_context['_volatile_variable_list'] volatile_variable_list = current_setup_dict.keys() + inject_variable_dict.keys() + user_context['_volatile_variable_list']
del user_context['_volatile_variable_list'] volatile_variable_list.append('__builtins__')
for key, val in user_context.items(): for key, val in user_context.items():
if not key in globals_dict.keys() and not isinstance(val, ModuleType) and not key in volatile_variable_list: if not key in globals_dict.keys() and not isinstance(val, ModuleType) and not key in volatile_variable_list:
can_store = False if canSerialize(val):
# Try to check if we can serialize the object in a way which it can be
# stored properly in the ZODB
try:
# Need to unwrap the variable, otherwise we get a TypeError, because
# objects cannot be pickled while inside an acquisition wrapper.
ObjectWriter(val).serialize(Acquisition.aq_base(val))
can_store = True
# If cannot serialize object with ZODB.serialize, try with cPickle
except:
try:
# Only a dump of the object is not enough. Dumping and trying to
# load it will properly raise errors in all possible situations,
# for example: if the user defines a dict with an object of a class
# that he created the dump will stil work, but the load will fail.
cPickle.loads(cPickle.dumps(val))
can_store = True
except:
can_store = False
if can_store:
notebook_context['variables'][key] = val notebook_context['variables'][key] = val
else: else:
del user_context[key] del user_context[key]
result_string += ( result_string += (
"Cannot pickle the variable named %s whose value is %s, " "Cannot serialize the variable named %s whose value is %s, "
"thus it will not be stored in the context. " "thus it will not be stored in the context. "
"You should move it's definition to a function and " "You should move it's definition to a function and "
"use the environment object to load it.\n" "use the environment object to load it.\n"
) % (key, val) ) % (key, val)
# if isinstance(val, InstanceType):
# can_pickle = False
# Deleting from the variable storage the keys that are not in the user # Deleting from the variable storage the keys that are not in the user
# context anymore (i.e., variables that are deleted by the user). # context anymore (i.e., variables that are deleted by the user).
for key in notebook_context['variables'].keys(): for key in notebook_context['variables'].keys():
if not key in user_context: if not key in user_context:
del notebook_context['variables'][key] del notebook_context['variables'][key]
result = { result = {
'result_string': result_string, 'result_string': result_string,
'notebook_context': notebook_context, 'notebook_context': notebook_context,
...@@ -318,6 +400,63 @@ def Base_runJupyterCode(self, jupyter_code, old_notebook_context): ...@@ -318,6 +400,63 @@ def Base_runJupyterCode(self, jupyter_code, old_notebook_context):
'traceback': tb_list, 'traceback': tb_list,
} }
return result return result
def canSerialize(obj):
result = False
container_type_tuple = (list, tuple, dict, set, frozenset)
# if object is a container, we need to check its elements for presence of
# objects that cannot be put inside the zodb
if isinstance(obj, container_type_tuple):
if isinstance(obj, dict):
result_list = []
for key, value in obj.iteritems():
result_list.append(canSerialize(key))
result_list.append(canSerialize(value))
else:
result_list = [canSerialize(element) for element in obj]
return all(result_list)
# if obj is an object and implements __getstate__, ZODB.serialize can check
# if we can store it
elif isinstance(obj, object) and hasattr(obj, '__getstate__'):
# Need to unwrap the variable, otherwise we get a TypeError, because
# objects cannot be pickled while inside an acquisition wrapper.
unwrapped_obj = Acquisition.aq_base(obj)
writer = ObjectWriter(unwrapped_obj)
for obj in writer:
try:
writer.serialize(obj)
# Because writer.serialize(obj) relies on the implementation of __getstate__
# of obj, all errors can happen, so the "except all" is necessary here.
except:
return False
return True
else:
# If cannot serialize object with ZODB.serialize, try with cPickle
# Only a dump of the object is not enough. Dumping and trying to
# load it will properly raise errors in all possible situations,
# for example: if the user defines a dict with an object of a class
# that he created the dump will stil work, but the load will fail.
try:
cPickle.loads(cPickle.dumps(obj))
# By unknowing reasons, trying to catch cPickle.PicklingError in the "normal"
# way isn't working. This issue might be related to some weirdness in
# pickle/cPickle that is reported in this issue: http://bugs.python.org/issue1457119.
#
# So, as a temporary fix, we're investigating the exception's class name as
# string to be able to identify them.
#
# Even though the issue seems complicated, this quickfix should be
# properly rewritten in a better way as soon as possible.
except Exception as e:
if type(e).__name__ in ('PicklingError', 'TypeError', 'NameError', 'AttributeError'):
return False
else:
raise e
else:
return True
class EnvironmentParser(ast.NodeTransformer): class EnvironmentParser(ast.NodeTransformer):
......
"""
Python script to create Data Notebook or update existing Data Notebooks
identifying notebook by reference from user.
Expected behaviour from this script:-
1. Return unauthorized message for non-developer user.
2. Create new 'Data Notebook' for new reference.
3. Add new 'Data Notebook Line'to the existing Data Notebook on basis of reference.
4. Return python dictionary containing list of all notebooks for 'request_reference=True'
"""
portal = context.getPortalObject()
# Check permissions for current user and display message to non-authorized user
if not portal.Base_checkPermission('portal_components', 'Manage Portal'):
return "You are not authorized to access the script"
import json
# Convert the request_reference argument string to their respeced boolean values
request_reference = {'True': True, 'False': False}.get(request_reference, False)
# Return python dictionary with title and reference of all notebooks
# for request_reference=True
if request_reference:
data_notebook_list = portal.portal_catalog(portal_type='Data Notebook')
notebook_detail_list = [{'reference': obj.getReference(), 'title': obj.getTitle()} for obj in data_notebook_list]
return notebook_detail_list
if not reference:
message = "Please set or use reference for the notebook you want to use"
return message
# Take python_expression as '' for empty code from jupyter frontend
if not python_expression:
python_expression = ''
# Get Data Notebook with the specific reference
data_notebook = portal.portal_catalog.getResultValue(portal_type='Data Notebook',
reference=reference)
# Create new Data Notebook if reference doesn't match with any from existing ones
if not data_notebook:
notebook_module = portal.getDefaultModule(portal_type='Data Notebook')
data_notebook = notebook_module.DataNotebookModule_addDataNotebook(
title=title,
reference=reference,
batch_mode=True
)
# Add new Data Notebook Line to the Data Notebook
data_notebook_line = data_notebook.DataNotebook_addDataNotebookLine(
notebook_code=python_expression,
batch_mode=True
)
# Gets the context associated to the data notebook being used
#
old_notebook_context = data_notebook.getNotebookContext()
if not old_notebook_context:
old_notebook_context = portal.Base_createNotebookContext()
# Pass all to code Base_runJupyter external function which would execute the code
# and returns a dict of result
final_result = context.Base_runJupyter(python_expression, old_notebook_context)
code_result = final_result['result_string']
new_local_variable_dict = final_result['notebook_context']
ename = final_result['ename']
evalue = final_result['evalue']
traceback = final_result['traceback']
status = final_result['status']
mime_type = final_result['mime_type']
# Updates the context in the notebook with the resulting context of code
# execution.
#
try:
data_notebook.setNotebookContext(new_local_variable_dict)
except Exception as e:
return context.Base_getErrorMessageForException(e, new_local_variable_dict)
result = {
u'code_result': code_result,
u'ename': ename,
u'evalue': evalue,
u'traceback': traceback,
u'status': status,
u'mime_type': mime_type
}
# Catch exception while seriaizing the result to be passed to jupyter frontend
# and in case of error put code_result as None and status as 'error' which would
# be shown by Jupyter frontend
try:
serialized_result = json.dumps(result)
except UnicodeDecodeError:
result = {
u'code_result': None,
u'ename': u'UnicodeDecodeError',
u'evalue': None,
u'traceback': None,
u'status': u'error',
u'mime_type': mime_type
}
serialized_result = json.dumps(result)
data_notebook_line.edit(notebook_code_result=code_result, mime_type=mime_type)
return serialized_result
...@@ -2,71 +2,26 @@ ...@@ -2,71 +2,26 @@
<ZopeData> <ZopeData>
<record id="1" aka="AAAAAAAAAAE="> <record id="1" aka="AAAAAAAAAAE=">
<pickle> <pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/> <global name="ExternalMethod" module="Products.ExternalMethod.ExternalMethod"/>
</pickle> </pickle>
<pickle> <pickle>
<dictionary> <dictionary>
<item> <item>
<key> <string>Script_magic</string> </key> <key> <string>_function</string> </key>
<value> <int>3</int> </value> <value> <string>Base_executeJupyter</string> </value>
</item>
<item>
<key> <string>_Access_contents_information_Permission</string> </key>
<value>
<tuple>
<string>Authenticated</string>
<string>Author</string>
<string>Manager</string>
<string>Owner</string>
</tuple>
</value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item> </item>
<item> <item>
<key> <string>_params</string> </key> <key> <string>_module</string> </key>
<value> <string>python_expression=None, reference=None, title=None, request_reference=False, **kw</string> </value> <value> <string>JupyterCompile</string> </value>
</item> </item>
<item> <item>
<key> <string>id</string> </key> <key> <string>id</string> </key>
<value> <string>Base_executeJupyter</string> </value> <value> <string>Base_executeJupyter</string> </value>
</item> </item>
<item>
<key> <string>title</string> </key>
<value> <string></string> </value>
</item>
</dictionary> </dictionary>
</pickle> </pickle>
</record> </record>
......
...@@ -150,9 +150,9 @@ portal.%s() ...@@ -150,9 +150,9 @@ portal.%s()
portal = self.portal portal = self.portal
self.login('member_user') self.login('member_user')
result = portal.Base_executeJupyter.Base_checkPermission('portal_components', 'Manage Portal') result = portal.Base_executeJupyter(title='Any title', reference='Any reference')
self.assertFalse(result) self.assertEquals(result, 'You are not authorized to access the script')
def testUserCanCreateNotebookWithoutCode(self): def testUserCanCreateNotebookWithoutCode(self):
""" """
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment