Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
W
wendelin
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Roque
wendelin
Commits
4bd0aa64
Commit
4bd0aa64
authored
Jun 10, 2020
by
Roque
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
erp5_wendelin_data_lake_ingestion: update unit tests
parent
edc668b5
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
48 additions
and
41 deletions
+48
-41
bt5/erp5_wendelin_data_lake_ingestion/TestTemplateItem/portal_components/test.erp5.testDataLakeIngestion.py
...Item/portal_components/test.erp5.testDataLakeIngestion.py
+46
-39
bt5/erp5_wendelin_data_lake_ingestion/TestTemplateItem/portal_components/test.erp5.testDataLakeIngestion.xml
...tem/portal_components/test.erp5.testDataLakeIngestion.xml
+2
-2
No files found.
bt5/erp5_wendelin_data_lake_ingestion/TestTemplateItem/portal_components/test.erp5.testDataLakeIngestion.py
View file @
4bd0aa64
...
@@ -70,6 +70,12 @@ class TestDataIngestion(SecurityTestCase):
...
@@ -70,6 +70,12 @@ class TestDataIngestion(SecurityTestCase):
reference
=
reference
)
reference
=
reference
)
return
data_stream
return
data_stream
def
getDataStreamChunkList
(
self
,
reference
):
data_stream_list
=
self
.
portal
.
portal_catalog
(
portal_type
=
'Data Stream'
,
reference
=
reference
)
return
data_stream_list
def
ingestRequest
(
self
,
reference
,
eof
,
data_chunk
,
ingestion_policy
):
def
ingestRequest
(
self
,
reference
,
eof
,
data_chunk
,
ingestion_policy
):
encoded_data_chunk
=
base64
.
b64encode
(
data_chunk
)
encoded_data_chunk
=
base64
.
b64encode
(
data_chunk
)
request
=
self
.
portal
.
REQUEST
request
=
self
.
portal
.
REQUEST
...
@@ -84,13 +90,10 @@ class TestDataIngestion(SecurityTestCase):
...
@@ -84,13 +90,10 @@ class TestDataIngestion(SecurityTestCase):
def
ingest
(
self
,
data_chunk
,
reference
,
extension
,
eof
,
randomize_ingestion_reference
=
False
):
def
ingest
(
self
,
data_chunk
,
reference
,
extension
,
eof
,
randomize_ingestion_reference
=
False
):
ingestion_reference
=
self
.
getIngestionReference
(
reference
,
extension
,
randomize_ingestion_reference
)
ingestion_reference
=
self
.
getIngestionReference
(
reference
,
extension
,
randomize_ingestion_reference
)
# use default ebulk policy
# use default ebulk policy
ingestion_policy
=
self
.
portal
.
portal_ingestion_policies
.
wendelin_embulk
ingestion_policy
=
self
.
portal
.
portal_ingestion_policies
.
default_ebulk
self
.
ingestRequest
(
ingestion_reference
,
eof
,
data_chunk
,
ingestion_policy
)
self
.
ingestRequest
(
ingestion_reference
,
eof
,
data_chunk
,
ingestion_policy
)
_
,
ingestion_reference
=
self
.
sanitizeReference
(
ingestion_reference
)
_
,
ingestion_reference
=
self
.
sanitizeReference
(
ingestion_reference
)
return
ingestion_reference
return
ingestion_reference
def
stepIngest
(
self
,
extension
,
delimiter
,
randomize_ingestion_reference
=
False
):
def
stepIngest
(
self
,
extension
,
delimiter
,
randomize_ingestion_reference
=
False
):
...
@@ -108,7 +111,6 @@ class TestDataIngestion(SecurityTestCase):
...
@@ -108,7 +111,6 @@ class TestDataIngestion(SecurityTestCase):
chunk
.
append
(
line
)
chunk
.
append
(
line
)
else
:
else
:
break
break
ingestion_reference
=
self
.
ingest
(
data_chunk
,
reference
,
extension
,
self
.
SINGLE_INGESTION_END
,
randomize_ingestion_reference
=
randomize_ingestion_reference
)
ingestion_reference
=
self
.
ingest
(
data_chunk
,
reference
,
extension
,
self
.
SINGLE_INGESTION_END
,
randomize_ingestion_reference
=
randomize_ingestion_reference
)
if
os
.
path
.
exists
(
file_name
):
if
os
.
path
.
exists
(
file_name
):
...
@@ -127,8 +129,9 @@ class TestDataIngestion(SecurityTestCase):
...
@@ -127,8 +129,9 @@ class TestDataIngestion(SecurityTestCase):
data_stream_data
=
data_stream
.
getData
()
data_stream_data
=
data_stream
.
getData
()
self
.
assertEqual
(
data_chunk
,
data_stream_data
)
self
.
assertEqual
(
data_chunk
,
data_stream_data
)
# check Data Stream and Data Set are validated
# check Data Set is validated and Data Stream is published
self
.
assertEqual
(
'validated'
,
data_stream
.
getValidationState
())
self
.
assertEqual
(
'validated'
,
data_set
.
getValidationState
())
self
.
assertEqual
(
'published'
,
data_stream
.
getValidationState
())
return
data_set
,
[
data_stream
]
return
data_set
,
[
data_stream
]
...
@@ -140,7 +143,7 @@ class TestDataIngestion(SecurityTestCase):
...
@@ -140,7 +143,7 @@ class TestDataIngestion(SecurityTestCase):
def
test_02_DefaultSplitIngestion
(
self
):
def
test_02_DefaultSplitIngestion
(
self
):
"""
"""
Test multiple uploads from ebulk end up in
same Data Stream concatenated
Test multiple uploads from ebulk end up in
multiple Data Streams
(in case of large file upload when ebluk by default splits file to 50MBs
(in case of large file upload when ebluk by default splits file to 50MBs
chunks).
chunks).
"""
"""
...
@@ -152,7 +155,6 @@ class TestDataIngestion(SecurityTestCase):
...
@@ -152,7 +155,6 @@ class TestDataIngestion(SecurityTestCase):
for
_
in
xrange
(
250
)])
for
_
in
xrange
(
250
)])
data_chunk_4
=
''
.
join
([
random
.
choice
(
string
.
ascii_letters
+
string
.
digits
)
\
data_chunk_4
=
''
.
join
([
random
.
choice
(
string
.
ascii_letters
+
string
.
digits
)
\
for
_
in
xrange
(
250
)])
for
_
in
xrange
(
250
)])
data_chunk
=
data_chunk_1
+
data_chunk_2
+
data_chunk_3
+
data_chunk_4
reference
=
self
.
getRandomReference
()
reference
=
self
.
getRandomReference
()
...
@@ -172,13 +174,20 @@ class TestDataIngestion(SecurityTestCase):
...
@@ -172,13 +174,20 @@ class TestDataIngestion(SecurityTestCase):
time
.
sleep
(
1
)
time
.
sleep
(
1
)
self
.
tic
()
self
.
tic
()
# call explicitly alarm so all 4 Data Streams
can be concatenated to one
# call explicitly alarm so all 4 Data Streams
are validated and published
self
.
portal
.
portal_alarms
.
wendelin_
data_lake_handle_analysis
.
Alarm_dataLakeH
andleAnalysis
()
self
.
portal
.
portal_alarms
.
wendelin_
handle_analysis
.
Alarm_h
andleAnalysis
()
self
.
tic
()
self
.
tic
()
# check resulting Data Stream
# check resulting Data Streams
data_stream
=
self
.
getDataStream
(
ingestion_reference
)
data_stream_list
=
self
.
getDataStreamChunkList
(
ingestion_reference
)
self
.
assertEqual
(
data_chunk
,
data_stream
.
getData
())
#one data stream per chunk
self
.
assertEqual
(
len
(
data_stream_list
),
4
)
#last datastream (EOF) published, the rest validated
for
stream
in
data_stream_list
:
if
stream
.
getId
().
endswith
(
self
.
EOF
.
replace
(
self
.
REFERENCE_SEPARATOR
,
""
)):
self
.
assertEqual
(
'published'
,
stream
.
getValidationState
())
else
:
self
.
assertEqual
(
'validated'
,
stream
.
getValidationState
())
def
test_03_DefaultWendelinConfigurationExistency
(
self
):
def
test_03_DefaultWendelinConfigurationExistency
(
self
):
"""
"""
...
@@ -186,7 +195,7 @@ class TestDataIngestion(SecurityTestCase):
...
@@ -186,7 +195,7 @@ class TestDataIngestion(SecurityTestCase):
"""
"""
# test default ebuk ingestion exists
# test default ebuk ingestion exists
self
.
assertNotEqual
(
None
,
self
.
assertNotEqual
(
None
,
getattr
(
self
.
portal
.
portal_ingestion_policies
,
"
wendelin_em
bulk"
,
None
))
getattr
(
self
.
portal
.
portal_ingestion_policies
,
"
default_e
bulk"
,
None
))
self
.
assertNotEqual
(
None
,
self
.
assertNotEqual
(
None
,
getattr
(
self
.
portal
.
data_supply_module
,
"embulk"
,
None
))
getattr
(
self
.
portal
.
data_supply_module
,
"embulk"
,
None
))
...
@@ -200,10 +209,8 @@ class TestDataIngestion(SecurityTestCase):
...
@@ -200,10 +209,8 @@ class TestDataIngestion(SecurityTestCase):
# check data relation between Data Set and Data Streams work
# check data relation between Data Set and Data Streams work
self
.
assertSameSet
(
data_stream_list
,
data_set
.
DataSet_getDataStreamList
())
self
.
assertSameSet
(
data_stream_list
,
data_set
.
DataSet_getDataStreamList
())
# publish data set and have all Data Streams publsihed automatically
# check data set and all Data Streams states
data_set
.
publish
()
self
.
assertEqual
(
'validated'
,
data_set
.
getValidationState
())
self
.
tic
()
self
.
assertEqual
(
'published'
,
data_set
.
getValidationState
())
self
.
assertSameSet
([
'published'
for
x
in
data_stream_list
],
self
.
assertSameSet
([
'published'
for
x
in
data_stream_list
],
[
x
.
getValidationState
()
for
x
in
data_stream_list
])
[
x
.
getValidationState
()
for
x
in
data_stream_list
])
...
...
bt5/erp5_wendelin_data_lake_ingestion/TestTemplateItem/portal_components/test.erp5.testDataLakeIngestion.xml
View file @
4bd0aa64
...
@@ -46,8 +46,8 @@
...
@@ -46,8 +46,8 @@
<key>
<string>
text_content_warning_message
</string>
</key>
<key>
<string>
text_content_warning_message
</string>
</key>
<value>
<value>
<tuple>
<tuple>
<string>
W:
99
, 34: Unused variable \'i\' (unused-variable)
</string>
<string>
W:
102
, 34: Unused variable \'i\' (unused-variable)
</string>
<string>
W:
99
, 76: Unused variable \'j\' (unused-variable)
</string>
<string>
W:
102
, 76: Unused variable \'j\' (unused-variable)
</string>
</tuple>
</tuple>
</value>
</value>
</item>
</item>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment