Commit 5ec409e2 authored by Ivan Tyagov's avatar Ivan Tyagov

Roque data lake

See merge request nexedi/wendelin!66
parents 04979895 f0dd7676
...@@ -17,7 +17,7 @@ except Exception as e: # fails because unauthorized access ...@@ -17,7 +17,7 @@ except Exception as e: # fails because unauthorized access
data_stream_dict = {} data_stream_dict = {}
for stream in data_set.DataSet_getDataStreamList(): for stream in data_set.DataSet_getDataStreamList():
if not portal.ERP5Site_checkReferenceInvalidated(stream) and stream.getValidationState() != "draft": if stream and not portal.ERP5Site_checkReferenceInvalidated(stream) and stream.getValidationState() != "draft":
data_stream_info_dict = { 'id': 'data_stream_module/'+stream.getId(), data_stream_info_dict = { 'id': 'data_stream_module/'+stream.getId(),
'size': stream.getSize(), 'size': stream.getSize(),
'hash': stream.getVersion() } 'hash': stream.getVersion() }
......
"""This script invalidate all data sets (and corresponding ingestion objects) older than wendelin.io release (<= 2019)"""
portal = context.getPortalObject()
portal_catalog = portal.portal_catalog
from DateTime import DateTime
old_date = DateTime(2019, 12, 31)
catalog_kw = {'modification_date': {'query': old_date, 'range': '<='}}
print "Following Dataset were invalidated:"
for data_set in portal_catalog(portal_type="Data Set", **catalog_kw):
print
print "DATASET: " + data_set.getReference()
print "state: " + data_set.getValidationState()
print "date: " + str(data_set.getModificationDate())
print "len of datastream list: " + str(len(data_set.DataSet_getDataStreamList()))
for data_stream in data_set.DataSet_getDataStreamList():
if data_stream is not None:
portal.ERP5Site_invalidateIngestionObjects(data_stream.getReference())
try:
data_stream.invalidate()
except:
pass # fails if it's already invalidated, draft or if it doens't allow invalidation (e.g. DI)
portal.ERP5Site_invalidateReference(data_set)
try:
data_set.invalidate()
except:
pass # fails if it's already invalidated, draft or if it doens't allow invalidation (e.g. DI)
return printed
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>ERP5Site_invalidateOldDatasets</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
...@@ -3,4 +3,5 @@ ...@@ -3,4 +3,5 @@
""" """
data_set = state_change['object'] data_set = state_change['object']
for data_stream in data_set.DataSet_getDataStreamList(): for data_stream in data_set.DataSet_getDataStreamList():
data_stream.activate().publish() if data_stream and not context.getPortalObject().ERP5Site_checkReferenceInvalidated(data_stream) and data_stream.getValidationState() != 'draft':
data_stream.activate().publish()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment