Commit 2d01c3f5 authored by Roque's avatar Roque

erp5_wendelin_data_lake_ingestion: new getDataStreamList script data structure return

- do not return invalid data streams (filter by reference and state)
- groups split file datastreams by reference
- returns full file size and large hash
parent d0138cb6
"""
This script is called from ebulk client to get list of Data Streams for a
Data set.
This script is called from ebulk client to get list of Data Streams for a Data set.
"""
import re
import json
from Products.ERP5Type.Log import log
from Products.ZSQLCatalog.SQLCatalog import Query, SimpleQuery, ComplexQuery
portal = context.getPortalObject()
portal_catalog = portal.portal_catalog
reference_separator = portal.ERP5Site_getIngestionReferenceDictionary()["reference_separator"]
try:
data_set = portal.data_set_module.get(data_set_reference)
if data_set is None or portal.ERP5Site_checkReferenceInvalidated(data_set):
return { "status_code": 0, "result": [] }
except Exception as e: # fails because unauthorized access
log("Unauthorized access to getDataStreamList.")
log("Unauthorized access to getDataStreamList: " + str(e))
return { "status_code": 1, "error_message": "401 - Unauthorized access. Please check your user credentials and try again." }
data_set = portal.data_set_module.get(data_set_reference)
if data_set is None:
return []
data_stream_list = []
data_stream_dict = {}
for stream in data_set.DataSet_getDataStreamList():
if stream.getVersion() == "":
return { "status_code": 2, "result": [] }
data_stream_list.append({ 'id': 'data_stream_module/'+stream.getId(),
'reference': stream.getReference(),
'size': stream.getSize(),
'hash': stream.getVersion() })
if not portal.ERP5Site_checkReferenceInvalidated(stream) and stream.getValidationState() != "draft":
data_stream_info_dict = { 'id': 'data_stream_module/'+stream.getId(),
'size': stream.getSize(),
'hash': stream.getVersion() }
if stream.getReference() in data_stream_dict:
data_stream_dict[stream.getReference()]['data-stream-list'].append(data_stream_info_dict)
data_stream_dict[stream.getReference()]['large-hash'] = data_stream_dict[stream.getReference()]['large-hash'] + str(stream.getVersion())
data_stream_dict[stream.getReference()]['full-size'] = int(data_stream_dict[stream.getReference()]['full-size']) + int(stream.getSize())
else:
data_stream_dict[stream.getReference()] = { 'data-stream-list': [data_stream_info_dict],
'id': 'data_stream_module/'+stream.getId(),
'reference': stream.getReference(),
'large-hash': stream.getVersion(),
'full-size': stream.getSize() }
dict = { 'status_code': 0, 'result': data_stream_list }
return json.dumps(dict)
result_dict = { 'status_code': 0, 'result': data_stream_dict.values()}
return json.dumps(result_dict)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment