Commit a6a9bbf7 authored by Roque Porchetto's avatar Roque Porchetto

erp5_wendelin_telecom_ingestion: new unit tests for recent features

parent 00a6dd7c
......@@ -4,8 +4,11 @@ import os
import json
import numpy as np
from lxml.html import parse
import hashlib
from Products.ERP5Type.Log import log
CHUNK_SIZE = 200000
def getMNEReportJSON(file_name):
try:
pattern = file_name + "_raw.fif"
......@@ -31,6 +34,18 @@ def getMNEReportJSON(file_name):
if os.path.exists(report_file):
os.remove(report_file)
def generateSizeHash(content):
file_name = "sample.txt"
with open(file_name, 'wb') as afile:
afile.write(content)
size = os.path.getsize(file_name)
with open(file_name, 'rb') as afile:
file_content = afile.read()
hash_value = hashlib.md5(file_content).hexdigest()
if os.path.exists(file_name):
os.remove(file_name)
return size, hash_value
def generateRawData(sample_data_stream):
log("-TEST- Getting raw content from sample data stream...")
content = sample_data_stream.getData()
......
......@@ -52,7 +52,7 @@
<key> <string>text_content_warning_message</string> </key>
<value>
<tuple>
<string>W: 50, 8: Unused variable \'times\' (unused-variable)</string>
<string>W: 65, 8: Unused variable \'times\' (unused-variable)</string>
</tuple>
</value>
</item>
......
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="ExternalMethod" module="Products.ExternalMethod.ExternalMethod"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_function</string> </key>
<value> <string>generateSizeHash</string> </value>
</item>
<item>
<key> <string>_module</string> </key>
<value> <string>unit_test_external</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>generateSizeHash</string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string></string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
......@@ -56,6 +56,7 @@ try:
if data_ingestion is None:
return FALSE
# TODO: fix this (contemplate scenarios of partial ingestion overwrites)
if size != "" and size != None:
# this is a modified file
return FALSE
......
......@@ -9,8 +9,9 @@ from datetime import datetime, timedelta
import numpy as np
import math
import base64
from Products.ZSQLCatalog.SQLCatalog import Query
from Products.ZSQLCatalog.SQLCatalog import Query, ComplexQuery
from Products.ERP5Type.Log import log
import hashlib
class TestDataIngestion(SecurityTestCase):
......@@ -30,9 +31,14 @@ class TestDataIngestion(SecurityTestCase):
CHUNK_SIZE_TXT = 50000
CHUNK_SIZE_CSV = 25
REF_PREFIX = "fake-supplier/fake-dataset/"
REF_SUPPLIER_PREFIX = "fake-supplier/"
INGESTION_SCRIPT = 'HandleFifEmbulkIngestion'
USER = 'zope'
PASS = 'roque5'
INVALID = "_invalid"
NEW = "_NEW"
FALSE = "FALSE"
TRUE = "TRUE"
def getTitle(self):
return "DataIngestionTest"
......@@ -57,6 +63,9 @@ class TestDataIngestion(SecurityTestCase):
ingestion_id = data_stream.getId()
return ingestion_id, ingestion_reference
def getFullReference(self, ingestion_reference, size, hash_value):
return self.REF_SUPPLIER_PREFIX + ingestion_reference + "//" + str("") + "/" + ""
def chunks(self, l, n):
for i in xrange(0, len(l), n):
yield l[i:i+n]
......@@ -68,6 +77,10 @@ class TestDataIngestion(SecurityTestCase):
return raw_data, array, json_data
def getIngestionPolicy(self, reference, ingestion_script):
ingestion_policy = self.portal.portal_catalog.getResultValue(
portal_type = 'Ingestion Policy',
reference = reference)
if ingestion_policy != None: return ingestion_policy
ingestion_policy = self.portal.portal_ingestion_policies.newContent( \
id = reference,
portal_type ='Ingestion Policy',
......@@ -89,12 +102,24 @@ class TestDataIngestion(SecurityTestCase):
self.tic()
return
def getDataIngestion(self, reference):
data_ingestion = self.portal.portal_catalog.getResultValue(
portal_type = 'Data Ingestion',
reference = reference)
return data_ingestion
def getDataStream(self, reference):
data_stream = self.portal.portal_catalog.getResultValue(
portal_type = 'Data Stream',
reference = reference)
return data_stream
def getDataAnalysis(self, reference):
data_analysis = self.portal.portal_catalog.getResultValue(
portal_type = 'Data Analysis',
reference = reference)
return data_analysis
def getDataArray(self, reference):
data_array = self.portal.portal_catalog.getResultValue(
portal_type = 'Data Array',
......@@ -102,11 +127,21 @@ class TestDataIngestion(SecurityTestCase):
return data_array
def getDataDescriptor(self, reference):
query = Query(portal_type="Data Descriptor")
for document in self.portal.portal_catalog(query=query, sort_on=(('id', 'DESC', 'int'),)):
if document.reference == reference:
return document
return None
data_ingestion = None
query = ComplexQuery(Query(simulation_state='stopped'),
Query(simulation_state='delivered'),
logical_operator="OR")
ing_dict = {
"query": query,
"portal_type": "Data Ingestion",
"reference": reference}
ingestions = self.portal.portal_catalog(**ing_dict)
if len(ingestions) == 1:
data_ingestion = ingestions[0]
if data_ingestion == None: return None
url = 'data_descriptor_module/' + data_ingestion.getId()
data_descriptor = self.context.restrictedTraverse(url)
return data_descriptor
def manuallyStopIngestionWorkaround(self, reference, now_time):
try:
......@@ -143,19 +178,49 @@ class TestDataIngestion(SecurityTestCase):
return ingestion_reference
def checkDataObjects(self, ingestion_reference, data_chunk, array, json_data):
data_stream = self.getDataStream(ingestion_reference)
self.checkOperation(None, ingestion_reference, data_chunk, array, json_data)
return
def checkOperation(self, ingestion_reference, operation_reference, data_chunk, array, json_data):
if ingestion_reference != None:
data_ingestion = self.getDataIngestion(ingestion_reference)
self.assertEqual(data_ingestion, None)
data_analysis = self.getDataAnalysis(operation_reference)
self.assertNotEqual(data_analysis, None)
data_analysis = self.getDataAnalysis(ingestion_reference)
self.assertEqual(data_analysis, None)
data_stream = self.getDataStream(ingestion_reference)
self.assertEqual(data_stream, None)
data_array = self.getDataArray(ingestion_reference)
self.assertEqual(data_array, None)
data_descriptor = self.getDataDescriptor(ingestion_reference)
self.assertEqual(data_descriptor, None)
data_ingestion = self.getDataIngestion(operation_reference)
self.assertEqual(data_ingestion.getSimulationState(), "delivered")
size, hash_value = self.context.generateSizeHash(data_chunk)
data_stream = self.getDataStream(operation_reference)
self.assertEqual(len(data_chunk), len(data_stream.getData()))
self.assertEqual(size, data_stream.getSize())
self.assertEqual(hash_value, data_stream.getVersion())
self.assertEqual(data_chunk, data_stream.getData())
data_array = self.getDataArray(ingestion_reference)
data_array = self.getDataArray(operation_reference)
if array is None:
self.assertEqual(array, data_array.getArray())
else:
np.testing.assert_allclose(array, data_array.getArray()[:])
self.assertTrue(np.allclose(array, data_array.getArray()[:]))
data_descriptor = self.getDataDescriptor(ingestion_reference)
self.assertEqual(json_data, data_descriptor.getTextContent())
if ingestion_reference == None:
data_descriptor = self.getDataDescriptor(operation_reference)
self.assertEqual(json_data, data_descriptor.getTextContent())
def perform_csv_test(self, extension, delimiter):
file_name = "file_name.csv"
......@@ -280,3 +345,72 @@ class TestDataIngestion(SecurityTestCase):
data_stream = self.getDataStream(ingestion_reference)
self.assertEqual(len(data_chunk), len(data_stream.getData()))
self.assertEqual(data_chunk, data_stream.getData())
def test_deletion(self):
reference = self.getRandomReference()
data_chunk = ''.join([random.choice(string.ascii_letters + string.digits) for _ in xrange(self.CHUNK_SIZE_TXT + 1000)])
ingestion_reference = self.ingest(data_chunk, reference, self.TXT)
json_data = json.dumps({"File content sample: ": data_chunk[:self.CHUNK_SIZE_TXT]})
self.checkDataObjects(ingestion_reference, data_chunk, None, json_data)
self.portal.ERP5Site_invalidateIngestionObjects(ingestion_reference)
self.tic()
invalid_reference = ingestion_reference + self.INVALID
self.checkOperation(ingestion_reference, invalid_reference, data_chunk, None, json_data)
def test_rename(self):
reference = self.getRandomReference()
data_chunk = ''.join([random.choice(string.ascii_letters + string.digits) for _ in xrange(self.CHUNK_SIZE_TXT + 1000)])
ingestion_reference = self.ingest(data_chunk, reference, self.TXT)
json_data = json.dumps({"File content sample: ": data_chunk[:self.CHUNK_SIZE_TXT]})
self.checkDataObjects(ingestion_reference, data_chunk, None, json_data)
new_ingestion_reference = ingestion_reference + self.NEW
self.portal.ERP5Site_renameIngestion(ingestion_reference, new_ingestion_reference)
self.tic()
self.checkOperation(ingestion_reference, new_ingestion_reference, data_chunk, None, json_data)
def test_reingestion(self):
reference = self.getRandomReference()
data_chunk = ''.join([random.choice(string.ascii_letters + string.digits) for _ in xrange(self.CHUNK_SIZE_TXT + 1000)])
ingestion_reference = self.ingest(data_chunk, reference, self.TXT)
json_data = json.dumps({"File content sample: ": data_chunk[:self.CHUNK_SIZE_TXT]})
self.checkDataObjects(ingestion_reference, data_chunk, None, json_data)
new_data_chunk = ''.join([random.choice(string.ascii_letters + string.digits) for _ in xrange(self.CHUNK_SIZE_TXT + 1000)])
new_json_data = json.dumps({"File content sample: ": new_data_chunk[:self.CHUNK_SIZE_TXT]})
log("Reingesting existing reference")
ingestion_reference = self.ingest(new_data_chunk, reference, self.TXT)
self.checkDataObjects(ingestion_reference, new_data_chunk, None, new_json_data)
def test_reference_exists(self):
reference = self.getRandomReference()
data_chunk = ''.join([random.choice(string.ascii_letters + string.digits) for _ in xrange(self.CHUNK_SIZE_TXT + 1000)])
ingestion_reference = self.ingest(data_chunk, reference, self.TXT)
json_data = json.dumps({"File content sample: ": data_chunk[:self.CHUNK_SIZE_TXT]})
self.checkDataObjects(ingestion_reference, data_chunk, None, json_data)
size, hash_value = self.context.generateSizeHash(data_chunk)
full_reference = self.getFullReference(ingestion_reference, size, hash_value)
exists = self.portal.ingestionReferenceExists(full_reference)
self.assertEqual(exists, self.TRUE)
def test_descriptor_html_content_script(self):
reference = self.getRandomReference()
data_chunk = ''.join([random.choice(string.ascii_letters + string.digits) for _ in xrange(self.CHUNK_SIZE_TXT + 1000)])
ingestion_reference = self.ingest(data_chunk, reference, self.TXT)
json_data = json.dumps({"File content sample: ": data_chunk[:self.CHUNK_SIZE_TXT]})
self.checkDataObjects(ingestion_reference, data_chunk, None, json_data)
script_content = self.portal.getDescriptorHTMLContent(ingestion_reference)
log("script_content:")
log(script_content)
self.assertEqual(script_content, json_data)
# TODOs
#def test_object_invalidation(self):
#def test_deletion_rename_and_reingestion_on_split_ingestion(self):
#def test_descriptor_html_content_script_on_middle_of_ingestion(self):
#def test_usual_features_on_middle_of_ingestion(self):
......@@ -46,18 +46,18 @@
<key> <string>text_content_warning_message</string> </key>
<value>
<tuple>
<string>W:140, 4: Unused variable \'ingestion_id\' (unused-variable)</string>
<string>W:163, 34: Unused variable \'i\' (unused-variable)</string>
<string>W:163, 76: Unused variable \'j\' (unused-variable)</string>
<string>W:186, 4: Redefining name \'np\' from outer scope (line 9) (redefined-outer-name)</string>
<string>W:186, 4: Reimport \'numpy\' (imported line 9) (reimported)</string>
<string>W:202, 11: Using type() instead of isinstance() for a typecheck. (unidiomatic-typecheck)</string>
<string>W:206, 10: No exception type(s) specified (bare-except)</string>
<string>W:214, 26: Unused variable \'e\' (unused-variable)</string>
<string>W:279, 4: Unused variable \'ingestion_id\' (unused-variable)</string>
<string>W:175, 4: Unused variable \'ingestion_id\' (unused-variable)</string>
<string>W:228, 34: Unused variable \'i\' (unused-variable)</string>
<string>W:228, 76: Unused variable \'j\' (unused-variable)</string>
<string>W:251, 4: Redefining name \'np\' from outer scope (line 9) (redefined-outer-name)</string>
<string>W:251, 4: Reimport \'numpy\' (imported line 9) (reimported)</string>
<string>W:267, 11: Using type() instead of isinstance() for a typecheck. (unidiomatic-typecheck)</string>
<string>W:271, 10: No exception type(s) specified (bare-except)</string>
<string>W:279, 26: Unused variable \'e\' (unused-variable)</string>
<string>W:344, 4: Unused variable \'ingestion_id\' (unused-variable)</string>
<string>W: 8, 0: Unused timedelta imported from datetime (unused-import)</string>
<string>W: 10, 0: Unused import math (unused-import)</string>
<string>W: 13, 0: Unused log imported from Products.ERP5Type.Log (unused-import)</string>
<string>W: 14, 0: Unused import hashlib (unused-import)</string>
</tuple>
</value>
</item>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment