Commit 7a041f86 authored by Tatuya Kamada's avatar Tatuya Kamada

patches/Restricted: Allow numpy and pandas

Most parts of this patch come from wendelin, plus the tests and some fix.

See merge request nexedi/erp5!1252
parents 602b041e a254bf50
Pipeline #11423 failed with stage
in 0 seconds
...@@ -35,8 +35,9 @@ from Products.ERP5Type.tests.ERP5TypeTestCase import ERP5TypeTestCase ...@@ -35,8 +35,9 @@ from Products.ERP5Type.tests.ERP5TypeTestCase import ERP5TypeTestCase
from Products.ERP5Type.tests.utils import createZODBPythonScript from Products.ERP5Type.tests.utils import createZODBPythonScript
from Products.ERP5Type.tests.utils import removeZODBPythonScript from Products.ERP5Type.tests.utils import removeZODBPythonScript
from Products.ERP5Type.patches.Restricted import allow_class_attribute from Products.ERP5Type.patches.Restricted import allow_class_attribute
from Products.ERP5Type.patches.Restricted import (pandas_black_list, dataframe_black_list, series_black_list)
from AccessControl import Unauthorized from AccessControl import Unauthorized
from AccessControl.ZopeGuards import Unauthorized as ZopeGuardsUnauthorized
class TestRestrictedPythonSecurity(ERP5TypeTestCase): class TestRestrictedPythonSecurity(ERP5TypeTestCase):
""" """
...@@ -453,6 +454,168 @@ class TestRestrictedPythonSecurity(ERP5TypeTestCase): ...@@ -453,6 +454,168 @@ class TestRestrictedPythonSecurity(ERP5TypeTestCase):
expected="ok" expected="ok"
) )
def testNumpy(self):
self.createAndRunScript(
textwrap.dedent('''
import numpy as np
return [x for x in (np.dtype('int32').name, np.timedelta64(1, 'D').nbytes)]
'''),
expected=["int32", 8]
)
def testNdarrayWrite(self):
self.createAndRunScript(
textwrap.dedent('''
import numpy as np
z = np.array([[1,2],[3,4]])
z[0][0] = 99
return z[0][0]
'''),
expected=99
)
def testPandasSeries(self):
self.createAndRunScript(
textwrap.dedent('''
import pandas as pd
return pd.Series([1,2,3]).tolist()
'''),
expected=[1,2,3]
)
def testPandasTimestamp(self):
self.createAndRunScript(
textwrap.dedent('''
import pandas as pd
return pd.Timestamp('2020-01').year
'''),
expected=2020
)
def testPandasDatetimeIndex(self):
self.createAndRunScript(
textwrap.dedent('''
import pandas as pd
df = pd.DataFrame({'date':['2020-01-01','2020-03-01']})
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)
return str(df.index.name)
'''),
expected='date'
)
def testPandasMultiIndex(self):
self.createAndRunScript(
textwrap.dedent('''
import pandas as pd
df = pd.DataFrame({'a':[1,2],'b':[3,4],'c':[5,6]})
df2 = df.set_index(['a','b'],drop=True)
return list(df2.index.names)
'''),
expected=['a','b']
)
def testPandasIndex(self):
self.createAndRunScript(
textwrap.dedent('''
import pandas as pd
df = pd.DataFrame({'a':[1,2],'b':[3,4]})
df2 = df.set_index(['a'],drop=True)
return list(df2.index.names)
'''),
expected=['a']
)
def testPandasGroupBy(self):
# test pandas.core.groupby.DataFrameGroupBy,SeriesGroupBy
self.createAndRunScript(
textwrap.dedent('''
import pandas as pd
df2 = pd.DataFrame({'id':[1,1,2],'quantity':[3,4,5],'price':[6,7,8]})
return list(df2.groupby(['id'])['quantity'].agg('sum'))
'''),
expected=[7,5]
)
def testPandasLocIndexer(self):
self.createAndRunScript(
textwrap.dedent('''
import pandas as pd
df = pd.DataFrame({'a':[1,2],'b':[3,4]})
return df.loc[df['a'] == 1]['b'][0]
'''),
expected=3
)
def testPandasDataFrameWrite(self):
self.createAndRunScript(
textwrap.dedent('''
import pandas as pd
df = pd.DataFrame({'a':[1,2], 'b':[3,4]})
df.iloc[0, 0] = 999
return df['a'][0]
'''),
expected=999
)
def testPandasIORead(self):
self.assertRaises(Unauthorized,
self.createAndRunScript,
textwrap.dedent('''
import pandas as pd
pd.read_csv('testPandasIORead.csv')
'''))
# Test the black_list configuration validity
for read_method in pandas_black_list:
self.assertRaises(Unauthorized,
self.createAndRunScript,
textwrap.dedent('''
import pandas as pd
read_method = pd.{read_method}
read_method('testPandasIORead.data')
'''.format(read_method=read_method)))
def testPandasDataFrameIOWrite(self):
self.assertRaises(ZopeGuardsUnauthorized,
self.createAndRunScript,
textwrap.dedent('''
import pandas as pd
df = pd.DataFrame({'a':[1,2,3]})
df.to_csv('testPandasDataFrameIOWrite.csv')
'''))
# Test the black_list configuration validity
for write_method in dataframe_black_list:
self.assertRaises(ZopeGuardsUnauthorized,
self.createAndRunScript,
textwrap.dedent('''
import pandas as pd
df = pd.DataFrame(columns=['a','b'],data=[[1,2]])
write_method = df.{write_method}
write_method('testPandasDataFrameIOWrite.data')
'''.format(write_method=write_method)))
def testPandasSeriesIOWrite(self):
self.assertRaises(ZopeGuardsUnauthorized,
self.createAndRunScript,
textwrap.dedent('''
import pandas as pd
df = pd.Series([4,5,6])
df.to_csv('testPandasSeriesIOWrite.csv')
'''))
# Test the black_list configuration validity
for write_method in series_black_list:
self.assertRaises(ZopeGuardsUnauthorized,
self.createAndRunScript,
textwrap.dedent('''
import pandas as pd
df = pd.Series([4,5,6])
write_method = df.{write_method}
write_method('testPandasSeriesIOWrite.data')
'''.format(write_method=write_method)))
def test_suite(): def test_suite():
suite = unittest.TestSuite() suite = unittest.TestSuite()
......
...@@ -394,3 +394,84 @@ del member_id, member ...@@ -394,3 +394,84 @@ del member_id, member
from random import SystemRandom from random import SystemRandom
allow_type(SystemRandom) allow_type(SystemRandom)
ModuleSecurityInfo('os').declarePublic('urandom') ModuleSecurityInfo('os').declarePublic('urandom')
#
# backport from wendelin
#
# we neeed to allow access to numpy's internal types
import pandas as pd
import numpy as np
allow_module('numpy')
allow_module('numpy.lib.recfunctions')
for dtype in ('int8', 'int16', 'int32', 'int64', \
'uint8', 'uint16', 'uint32', 'uint64', \
'float16', 'float32', 'float64', \
'complex64', 'complex128'):
z = np.array([0,], dtype = dtype)
allow_type(type(z[0]))
allow_type(type(z))
sz = np.array([(0,)], dtype = [('f0', dtype)])
allow_type(type(sz[0]))
allow_type(type(sz))
rz = np.rec.array(np.array([(0,)], dtype = [('f0', dtype)]))
allow_type(type(rz[0]))
allow_type(type(rz))
allow_type(np.timedelta64)
allow_type(type(np.c_))
allow_type(type(np.dtype('int16')))
allow_module('pandas')
allow_type(pd.Series)
allow_type(pd.Timestamp)
allow_type(pd.DatetimeIndex)
# XXX: pd.DataFrame has its own security thus disable until we can fully integrate it
#allow_type(pd.DataFrame)
allow_type(pd.MultiIndex)
allow_type(pd.indexes.range.RangeIndex)
allow_type(pd.indexes.numeric.Int64Index)
allow_type(pd.core.groupby.DataFrameGroupBy)
allow_type(pd.core.groupby.SeriesGroupBy)
allow_class(pd.DataFrame)
def restrictedMethod(s,name):
def dummyMethod(*args, **kw):
raise Unauthorized(name)
return dummyMethod
# Note: These black_list methods are for pandas 0.19.2
series_black_list = ['to_csv', 'to_json', 'to_pickle', 'to_hdf',
'to_sql', 'to_msgpack']
series_black_list_dict = {m: restrictedMethod for m in series_black_list}
ContainerAssertions[pd.Series] = _check_access_wrapper(pd.Series,
series_black_list_dict)
pandas_black_list = ['read_csv', 'read_json', 'read_pickle', 'read_hdf', 'read_fwf',
'read_excel', 'read_html', 'read_msgpack',
'read_gbq', 'read_sas', 'read_stata']
ModuleSecurityInfo('pandas').declarePrivate(*pandas_black_list)
dataframe_black_list = ['to_csv', 'to_json', 'to_pickle', 'to_hdf',
'to_excel', 'to_html', 'to_sql', 'to_msgpack',
'to_latex', 'to_gbq', 'to_stata']
dataframe_black_list_dict = {m: restrictedMethod for m in dataframe_black_list}
ContainerAssertions[pd.DataFrame] = _check_access_wrapper(
pd.DataFrame, dataframe_black_list_dict)
# Modify 'safetype' dict in full_write_guard function
# of RestrictedPython (closure) directly to allow
# write access to ndarray and pandas DataFrame.
from RestrictedPython.Guards import full_write_guard
full_write_guard.func_closure[1].cell_contents.__self__[np.ndarray] = True
full_write_guard.func_closure[1].cell_contents.__self__[np.core.records.recarray] = True
full_write_guard.func_closure[1].cell_contents.__self__[np.core.records.record] = True
full_write_guard.func_closure[1].cell_contents.__self__[pd.DataFrame] = True
full_write_guard.func_closure[1].cell_contents.__self__[pd.Series] = True
full_write_guard.func_closure[1].cell_contents.__self__[pd.tseries.index.DatetimeIndex] = True
full_write_guard.func_closure[1].cell_contents.__self__[pd.core.indexing._iLocIndexer] = True
full_write_guard.func_closure[1].cell_contents.__self__[pd.core.indexing._LocIndexer] = True
full_write_guard.func_closure[1].cell_contents.__self__[pd.MultiIndex] = True
full_write_guard.func_closure[1].cell_contents.__self__[pd.Index] = True
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment