Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
erp5
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Titouan Soulard
erp5
Commits
1cbff971
Commit
1cbff971
authored
May 19, 2022
by
Levin Zimmermann
Browse files
Options
Browse Files
Download
Plain Diff
Allow patched pandas.read_* in restricted Python
See merge request
nexedi/erp5!1615
parents
85317472
4360dbc6
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
233 additions
and
16 deletions
+233
-16
bt5/erp5_core_test/TestTemplateItem/portal_components/test.erp5.testRestrictedPythonSecurity.py
...rtal_components/test.erp5.testRestrictedPythonSecurity.py
+143
-8
product/ERP5Type/Pandas.py
product/ERP5Type/Pandas.py
+84
-0
product/ERP5Type/patches/Restricted.py
product/ERP5Type/patches/Restricted.py
+6
-8
No files found.
bt5/erp5_core_test/TestTemplateItem/portal_components/test.erp5.testRestrictedPythonSecurity.py
View file @
1cbff971
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
#
#
##############################################################################
##############################################################################
import
json
import
os.path
import
os.path
import
tempfile
import
tempfile
import
textwrap
import
textwrap
...
@@ -572,14 +573,6 @@ class TestRestrictedPythonSecurity(ERP5TypeTestCase):
...
@@ -572,14 +573,6 @@ class TestRestrictedPythonSecurity(ERP5TypeTestCase):
)
)
def
testPandasIORead
(
self
):
def
testPandasIORead
(
self
):
self
.
assertRaises
(
Unauthorized
,
self
.
createAndRunScript
,
'''
import pandas as pd
pd.read_csv('testPandasIORead.csv')
'''
)
# Test the black_list configuration validity
# Test the black_list configuration validity
for
read_method
in
pandas_black_list
:
for
read_method
in
pandas_black_list
:
self
.
assertRaises
(
self
.
assertRaises
(
...
@@ -635,6 +628,148 @@ class TestRestrictedPythonSecurity(ERP5TypeTestCase):
...
@@ -635,6 +628,148 @@ class TestRestrictedPythonSecurity(ERP5TypeTestCase):
write_method('testPandasSeriesIOWrite.data')
write_method('testPandasSeriesIOWrite.data')
'''
.
format
(
write_method
=
write_method
))
'''
.
format
(
write_method
=
write_method
))
def
_assertPandasRestrictedReadFunctionIsEqualTo
(
self
,
read_function
,
read_argument
,
expected_data_frame_init
):
self
.
createAndRunScript
(
'''
import pandas as pd
expected_data_frame = pd.DataFrame({expected_data_frame_init})
return pd.{read_function}({read_argument}).equals(expected_data_frame)
'''
.
format
(
expected_data_frame_init
=
expected_data_frame_init
,
read_function
=
read_function
,
read_argument
=
read_argument
,
),
expected
=
True
)
def
testPandasRestrictedReadFunctionProhibitedInput
(
self
):
"""
Test if patched pandas read_* functions raise with any input which isn't a string.
"""
for
pandas_read_function
in
(
"read_json"
,
"read_csv"
,
"read_fwf"
):
for
preparation
,
prohibited_input
in
(
(
''
,
100
),
(
'from StringIO import StringIO'
,
'StringIO("[1, 2, 3]")'
),
):
self
.
assertRaises
(
ZopeGuardsUnauthorized
,
self
.
createAndRunScript
,
'''
import pandas as pd
{preparation}
pd.{pandas_read_function}({prohibited_input})
'''
.
format
(
preparation
=
preparation
,
pandas_read_function
=
pandas_read_function
,
prohibited_input
=
prohibited_input
,
)
)
def
testPandasReadFwf
(
self
):
read_function
=
"read_fwf"
# Normal input should be correctly handled
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
r'"100\n200"'
,
r"[[200]], columns=['100']"
,
)
# Ensure monkey patch parses keyword arguments to patched function
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
r'"1020\n3040", widths=[2, 2]'
,
r"[[30, 40]], columns=['10', '20']"
,
)
# A string containing an url or file path should be handled as if
# it would be a normal csv string entry
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
r'"file://path/to/fwf/file.fwf"'
,
r"[], columns=['file://path/to/fwf/file.fwf']"
,
)
def
testPandasReadCSV
(
self
):
read_function
=
"read_csv"
# Normal input should be correctly handled
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
r'"11,2,300\n50.5,99,hello"'
,
r"[[50.5, 99, 'hello']], columns='11 2 300'.split(' ')"
,
)
# Ensure monkey patch parses keyword arguments to patched function
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
r'"a;b", sep=";"'
,
r"[], columns=['a', 'b']"
,
)
# A string containing an url or file path should be handled as if
# it would be a normal csv string entry
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
r'"https://people.sc.fsu.edu/~jburkardt/data/csv/addresses.csv"'
,
r"[], columns=['https://people.sc.fsu.edu/~jburkardt/data/csv/addresses.csv']"
,
)
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
r'"file://path/to/csv/file.csv"'
,
r"[], columns=['file://path/to/csv/file.csv']"
,
)
def
testPandasReadJsonParsesInput
(
self
):
read_function
=
"read_json"
# Normal input should be correctly handled
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
'"[1, 2, 3]"'
,
"[1, 2, 3]"
)
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
'
\
'
{"column_name": [1, 2, 3], "another_column": [3, 9.2, 100]}
\
'
'
,
'{"column_name": [1, 2, 3], "another_column": [3, 9.2, 100]}'
,
)
# Ensure monkey patch parses keyword arguments to patched function
self
.
_assertPandasRestrictedReadFunctionIsEqualTo
(
read_function
,
r'"[1, 2, 3]\n[4, 5, 6]", lines=True'
,
"[[1, 2, 3], [4, 5, 6]]"
,
)
# URLs, etc. should raise a ValueError
# (see testPandasReadJsonProhibitsMalicousString)
def
testPandasReadJsonProhibitsMalicousString
(
self
):
"""
Test if file path, urls and other bad strings
raise value errors
"""
# Create valid json file which could be read
# by a non-patched read_json function.
test_file_path
=
".testPandasReadJson.json"
json_test_data
=
[
1
,
2
,
3
]
with
open
(
test_file_path
,
'w'
)
as
json_file
:
json
.
dump
(
json_test_data
,
json_file
)
self
.
addCleanup
(
os
.
remove
,
test_file_path
)
# Ensure json creation was successful
self
.
assertTrue
(
os
.
path
.
isfile
(
test_file_path
))
with
open
(
test_file_path
,
"r"
)
as
json_file
:
self
.
assertEqual
(
json_test_data
,
json
.
loads
(
json_file
.
read
()))
for
malicous_input
in
(
# If pandas would read this as an URL it should
# raise an URLError. But because it will try
# to read it as a json string, it will raise
# a ValueError.
"https://test-url.com/test-name.json"
,
"file://path/to/json/file.json"
,
# This shouldn't raise any error in case
# pandas read function wouldn't be patched.
test_file_path
,
# Gibberish should also raise a ValueError
"Invalid-string"
):
self
.
assertRaises
(
ValueError
,
self
.
createAndRunScript
,
'''
import pandas as pd
pd.read_json("{}")
'''
.
format
(
malicous_input
)
)
def
test_suite
():
def
test_suite
():
suite
=
unittest
.
TestSuite
()
suite
=
unittest
.
TestSuite
()
...
...
product/ERP5Type/Pandas.py
0 → 100644
View file @
1cbff971
##############################################################################
#
# Copyright (c) 2012 Nexedi SARL and Contributors. All Rights Reserved.
# Levin Zimmermann <levin.zimmermann@nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
"""
Restricted pandas module.
From restricted python, use "import pandas" (see patches/Restricted.py).
"""
from
pandas
import
*
# Add restricted versions of IO functions
import
six
as
_six
from
AccessControl.ZopeGuards
import
Unauthorized
as
_ZopeGuardsUnauthorized
if
_six
.
PY2
:
from
StringIO
import
StringIO
as
_StringIO
else
:
from
io
import
StringIO
as
_StringIO
def
_addRestrictedPandasReadFunction
(
function_name
):
original_function
=
getattr
(
__import__
(
'pandas'
),
function_name
)
def
Pandas_read
(
data_string
,
*
args
,
**
kwargs
):
# Strict: don't use 'isinstance', only allow buildin str
# objects
if
type
(
data_string
)
is
not
str
:
raise
_ZopeGuardsUnauthorized
(
"Parsing object '%s' of type '%s' is prohibited!"
%
(
data_string
,
type
(
data_string
))
)
string_io
=
_StringIO
(
data_string
)
return
original_function
(
string_io
,
*
args
,
**
kwargs
)
disclaimer
=
"""
\
n
Disclaimer:
This function has been patched by ERP5 for zope sandbox usage.
Only objects of type 'str' are valid inputs, file paths, files,
urls, etc. are prohibited or ignored.
"""
Pandas_read
.
__doc__
=
original_function
.
__doc__
+
disclaimer
globals
().
update
({
function_name
:
Pandas_read
})
def
_addRestrictedPandasReadFunctionTuple
():
pandas_read_function_to_restrict_tuple
=
(
"read_json"
,
# "read_html", # needs installation of additional dependency: html5lib
"read_csv"
,
"read_fwf"
,
# "read_xml", # only available for pandas version >= 1.3.0
)
for
pandas_read_function_to_restrict
in
pandas_read_function_to_restrict_tuple
:
_addRestrictedPandasReadFunction
(
pandas_read_function_to_restrict
)
_addRestrictedPandasReadFunctionTuple
()
\ No newline at end of file
product/ERP5Type/patches/Restricted.py
View file @
1cbff971
...
@@ -371,6 +371,7 @@ MNAME_MAP = {
...
@@ -371,6 +371,7 @@ MNAME_MAP = {
'calendar'
:
'Products.ERP5Type.Calendar'
,
'calendar'
:
'Products.ERP5Type.Calendar'
,
'collections'
:
'Products.ERP5Type.Collections'
,
'collections'
:
'Products.ERP5Type.Collections'
,
'six'
:
'Products.ERP5Type.Six'
,
'six'
:
'Products.ERP5Type.Six'
,
'pandas'
:
'Products.ERP5Type.Pandas'
,
}
}
for
alias
,
real
in
six
.
iteritems
(
MNAME_MAP
):
for
alias
,
real
in
six
.
iteritems
(
MNAME_MAP
):
assert
'.'
not
in
alias
,
alias
# TODO: support this
assert
'.'
not
in
alias
,
alias
# TODO: support this
...
@@ -478,23 +479,20 @@ def restrictedMethod(s,name):
...
@@ -478,23 +479,20 @@ def restrictedMethod(s,name):
raise
Unauthorized
(
name
)
raise
Unauthorized
(
name
)
return
dummyMethod
return
dummyMethod
try
:
try
:
import
pandas
as
pd
import
pandas
as
pd
except
ImportError
:
except
ImportError
:
pass
pass
else
:
else
:
allow_module
(
'pandas'
)
allow_type
(
pd
.
Series
)
allow_type
(
pd
.
Timestamp
)
allow_type
(
pd
.
Timestamp
)
allow_type
(
pd
.
DatetimeIndex
)
allow_type
(
pd
.
DatetimeIndex
)
# XXX: pd.DataFrame has its own security thus disable
# until we can fully integrate it
#allow_type(pd.DataFrame)
allow_type
(
pd
.
MultiIndex
)
allow_type
(
pd
.
MultiIndex
)
allow_type
(
pd
.
indexes
.
range
.
RangeIndex
)
allow_type
(
pd
.
indexes
.
range
.
RangeIndex
)
allow_type
(
pd
.
indexes
.
numeric
.
Int64Index
)
allow_type
(
pd
.
indexes
.
numeric
.
Int64Index
)
allow_type
(
pd
.
core
.
groupby
.
DataFrameGroupBy
)
allow_type
(
pd
.
core
.
groupby
.
DataFrameGroupBy
)
allow_type
(
pd
.
core
.
groupby
.
SeriesGroupBy
)
allow_type
(
pd
.
core
.
groupby
.
SeriesGroupBy
)
allow_class
(
pd
.
DataFrame
)
allow_class
(
pd
.
DataFrame
)
# Note: These black_list methods are for pandas 0.19.2
# Note: These black_list methods are for pandas 0.19.2
...
@@ -503,10 +501,10 @@ else:
...
@@ -503,10 +501,10 @@ else:
ContainerAssertions
[
pd
.
Series
]
=
_check_access_wrapper
(
ContainerAssertions
[
pd
.
Series
]
=
_check_access_wrapper
(
pd
.
Series
,
dict
.
fromkeys
(
series_black_list
,
restrictedMethod
))
pd
.
Series
,
dict
.
fromkeys
(
series_black_list
,
restrictedMethod
))
pandas_black_list
=
(
'read_
csv'
,
'read_json'
,
'read_
pickle'
,
'read_hdf'
,
pandas_black_list
=
(
'read_pickle'
,
'read_hdf'
,
'read_
fwf'
,
'read_
excel'
,
'read_html'
,
'read_msgpack'
,
'read_excel'
,
'read_html'
,
'read_msgpack'
,
'read_gbq'
,
'read_sas'
,
'read_stata'
)
'read_gbq'
,
'read_sas'
,
'read_stata'
)
ModuleSecurityInfo
(
'pandas'
).
declarePrivate
(
*
pandas_black_list
)
ModuleSecurityInfo
(
MNAME_MAP
[
'pandas'
]
).
declarePrivate
(
*
pandas_black_list
)
dataframe_black_list
=
(
'to_csv'
,
'to_json'
,
'to_pickle'
,
'to_hdf'
,
dataframe_black_list
=
(
'to_csv'
,
'to_json'
,
'to_pickle'
,
'to_hdf'
,
'to_excel'
,
'to_html'
,
'to_sql'
,
'to_msgpack'
,
'to_excel'
,
'to_html'
,
'to_sql'
,
'to_msgpack'
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment