Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
K
klaus_wendelin
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Eteri
klaus_wendelin
Commits
d423a31e
Commit
d423a31e
authored
Mar 03, 2021
by
Roque
Browse files
Options
Browse Files
Download
Plain Diff
Fix tests
See merge request
nexedi/wendelin!86
parents
43f6f999
dd988c3b
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
19 additions
and
51 deletions
+19
-51
bt5/erp5_wendelin/SkinTemplateItem/portal_skins/erp5_wendelin/DataSet_getDataStreamRowList.py
...ortal_skins/erp5_wendelin/DataSet_getDataStreamRowList.py
+1
-1
bt5/erp5_wendelin/SkinTemplateItem/portal_skins/erp5_wendelin/ERP5Site_getDataStreamList.py
.../portal_skins/erp5_wendelin/ERP5Site_getDataStreamList.py
+0
-0
bt5/erp5_wendelin/SkinTemplateItem/portal_skins/erp5_wendelin/ERP5Site_getDataStreamList.xml
...portal_skins/erp5_wendelin/ERP5Site_getDataStreamList.xml
+1
-1
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/DataSet_getDataStreamList.py
...kins/erp5_wendelin_data_lake/DataSet_getDataStreamList.py
+13
-0
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/DataSet_getDataStreamList.xml
...ins/erp5_wendelin_data_lake/DataSet_getDataStreamList.xml
+2
-2
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_checkIngestedData.py
...ins/erp5_wendelin_data_lake/ERP5Site_checkIngestedData.py
+0
-45
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamCount.py
...ns/erp5_wendelin_data_lake/ERP5Site_getDataStreamCount.py
+1
-1
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamList.py
...ins/erp5_wendelin_data_lake/ERP5Site_getDataStreamList.py
+1
-1
No files found.
bt5/erp5_wendelin/SkinTemplateItem/portal_skins/erp5_wendelin/DataSet_getDataStreamRowList.py
View file @
d423a31e
...
@@ -2,4 +2,4 @@
...
@@ -2,4 +2,4 @@
Get list of Data Streams for context Data set.
Get list of Data Streams for context Data set.
"""
"""
data_set_uid
=
context
.
getUid
()
data_set_uid
=
context
.
getUid
()
return
context
.
DataSet
_getDataStreamList
(
data_set_uid
,
limit
)
return
context
.
ERP5Site
_getDataStreamList
(
data_set_uid
,
limit
)
bt5/erp5_wendelin/SkinTemplateItem/portal_skins/erp5_wendelin/
DataSet
_getDataStreamList.py
→
bt5/erp5_wendelin/SkinTemplateItem/portal_skins/erp5_wendelin/
ERP5Site
_getDataStreamList.py
View file @
d423a31e
File moved
bt5/erp5_wendelin/SkinTemplateItem/portal_skins/erp5_wendelin/
DataSet
_getDataStreamList.xml
→
bt5/erp5_wendelin/SkinTemplateItem/portal_skins/erp5_wendelin/
ERP5Site
_getDataStreamList.xml
View file @
d423a31e
...
@@ -54,7 +54,7 @@
...
@@ -54,7 +54,7 @@
</item>
</item>
<item>
<item>
<key>
<string>
id
</string>
</key>
<key>
<string>
id
</string>
</key>
<value>
<string>
DataSet
_getDataStreamList
</string>
</value>
<value>
<string>
ERP5Site
_getDataStreamList
</string>
</value>
</item>
</item>
<item>
<item>
<key>
<string>
title
</string>
</key>
<key>
<string>
title
</string>
</key>
...
...
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/DataSet_getDataStreamList.py
0 → 100644
View file @
d423a31e
"""
Return list of Data Streams belonging to a Date Set.
Data Ingestion line aggregates both Data Set and Data Stream.
Note: This code is quite computationally costly (for Data Streams having thousands of iles) as it needs to:
1. Query MariaDB to find ingestion lines
2. Read from ZODB both Data Ingestion Lines and Data Streams (which itself can be big too)
"""
data_ingestion_line_list
=
context
.
portal_catalog
(
portal_type
=
"Data Ingestion Line"
,
aggregate_uid
=
context
.
getUid
())
return
[
x
.
getAggregateValue
(
portal_type
=
"Data Stream"
)
\
for
x
in
data_ingestion_line_list
]
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/
ERP5Site_checkIngestedData
.xml
→
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/
DataSet_getDataStreamList
.xml
View file @
d423a31e
...
@@ -50,11 +50,11 @@
...
@@ -50,11 +50,11 @@
</item>
</item>
<item>
<item>
<key>
<string>
_params
</string>
</key>
<key>
<string>
_params
</string>
</key>
<value>
<string>
data_set_reference=None
</string>
</value>
<value>
<string></string>
</value>
</item>
</item>
<item>
<item>
<key>
<string>
id
</string>
</key>
<key>
<string>
id
</string>
</key>
<value>
<string>
ERP5Site_checkIngestedData
</string>
</value>
<value>
<string>
DataSet_getDataStreamList
</string>
</value>
</item>
</item>
</dictionary>
</dictionary>
</pickle>
</pickle>
...
...
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_checkIngestedData.py
deleted
100644 → 0
View file @
43f6f999
import
json
portal
=
context
.
getPortalObject
()
portal_catalog
=
portal
.
portal_catalog
def
getDatasetInfo
(
data_set
):
size
=
0
datastream_result_dict
=
json
.
loads
(
portal
.
ERP5Site_getDataStreamList
(
data_set
.
getReference
()))
for
stream_dict
in
datastream_result_dict
[
'result'
]:
size
+=
stream_dict
[
'full-size'
]
return
len
(
datastream_result_dict
[
'result'
]),
size
def
format_size
(
num
,
suffix
=
'b'
):
for
unit
in
[
''
,
'K'
,
'M'
,
'G'
,
'T'
,
'P'
,
'E'
,
'Z'
]:
if
abs
(
num
)
<
1024.0
:
return
"%3.1f %s%s"
%
(
num
,
unit
,
suffix
)
num
/=
1024.0
return
"%.1f %s%s"
%
(
num
,
'Yi'
,
suffix
)
data_set_list
=
[]
if
data_set_reference
:
try
:
data_set
=
portal
.
data_set_module
.
get
(
data_set_reference
)
if
data_set
is
None
or
portal
.
ERP5Site_checkReferenceInvalidated
(
data_set
):
return
"Not found: there is no valid dataset for that reference"
data_set_list
.
append
(
data_set
)
except
Exception
as
e
:
# fails because unauthorized access
return
"ERROR: "
+
str
(
e
)
else
:
data_set_list
=
portal_catalog
(
portal_type
=
"Data Set"
,
validation_state
=
'validated OR published'
)
total_size
=
0
for
data_set
in
data_set_list
:
print
"Data set "
+
data_set
.
getReference
()
nfiles
,
size
=
getDatasetInfo
(
data_set
)
total_size
+=
size
print
" #files: "
+
str
(
nfiles
)
print
" Size: "
+
format_size
(
size
)
print
if
len
(
data_set_list
)
>
1
:
print
print
"TOTAL SIZE: "
+
format_size
(
total_size
)
return
printed
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamCount.py
View file @
d423a31e
...
@@ -14,6 +14,6 @@ except Exception as e:
...
@@ -14,6 +14,6 @@ except Exception as e:
return
{
"status_code"
:
1
,
"error_message"
:
"401 - Unauthorized access. Please check your user credentials and try again."
}
return
{
"status_code"
:
1
,
"error_message"
:
"401 - Unauthorized access. Please check your user credentials and try again."
}
data_set_uid
=
data_set
.
getUid
()
data_set_uid
=
data_set
.
getUid
()
data_stream_list
=
context
.
DataSet
_getDataStreamList
(
data_set_uid
)
data_stream_list
=
context
.
ERP5Site
_getDataStreamList
(
data_set_uid
)
return
{
"status_code"
:
0
,
"result"
:
len
(
data_stream_list
)
}
return
{
"status_code"
:
0
,
"result"
:
len
(
data_stream_list
)
}
bt5/erp5_wendelin_data_lake_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_data_lake/ERP5Site_getDataStreamList.py
View file @
d423a31e
...
@@ -19,7 +19,7 @@ except Exception as e: # fails because unauthorized access
...
@@ -19,7 +19,7 @@ except Exception as e: # fails because unauthorized access
return
json
.
dumps
({
"status_code"
:
1
,
"error_message"
:
"401 - Unauthorized access. Please check your user credentials and try again."
})
return
json
.
dumps
({
"status_code"
:
1
,
"error_message"
:
"401 - Unauthorized access. Please check your user credentials and try again."
})
data_set_uid
=
data_set
.
getUid
()
data_set_uid
=
data_set
.
getUid
()
data_stream_list
=
context
.
DataSet
_getDataStreamList
(
data_set_uid
,
limit
)
data_stream_list
=
context
.
ERP5Site
_getDataStreamList
(
data_set_uid
,
limit
)
data_stream_dict
=
{}
data_stream_dict
=
{}
for
stream_brain
in
data_stream_list
:
for
stream_brain
in
data_stream_list
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment