Commit 1c85b4a5 authored by Ivan Tyagov's avatar Ivan Tyagov Committed by Roque

erp5_wendelin_data_lake: add an utility script to check md5sum of all Data...

erp5_wendelin_data_lake: add an utility script to check md5sum of all Data Streams for a Data Set (uploaded from file system) and what we have in reallity.
parent f814135b
"""
Script to check that a filesystem md5sum of a folder (uploaded to file_system_checksum File)
is properly uploaded to Wendelin Data Lake.
Format of is the same as md5sum's output:
<md5_sum> <filename.extension>
"""
data = str(context.file_system_checksum).strip()
lines = data.split("\n")
print "Total files = ", len(lines)
for line in lines[:]:
md5_checksum = line[:32].strip()
full_filename = line[32:].strip()
# check Data stream for this hash exists
filename, extension = full_filename.split(".")
reference = "%s/%s/%s" %(data_set_reference, filename, extension)
catalog_kw = {"portal_type": "Data Stream",
"reference": reference}
data_stream = context.portal_catalog.getResultValue(**catalog_kw)
if data_stream is None:
print "[NOT FOUND]", reference
else:
is_upload_ok = (data_stream.getVersion()==md5_checksum)
print md5_checksum, filename, data_stream is not None, is_upload_ok
return printed
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>data_set_reference=None</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>DataSet_checkMd5DataStreamList</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment