erp5_wendelin_telecom_ingestion: reduce one step in ingestion cycle (started-stopped-delivered)

545ba8c9 · Roque Porchetto · 54e7a310 · 545ba8c9 · 545ba8c9 · 54e7a310
Commit 545ba8c9 authored Sep 11, 2018 by Roque Porchetto
9 changed files
--- a/bt5/erp5_wendelin_telecom_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_telecom/Alarm_handleAnalysis.py
+++ b/bt5/erp5_wendelin_telecom_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_telecom/Alarm_handleAnalysis.py
 from Products.ERP5Type.Log import log
 portal = context.getPortalObject()
 portal.ERP5Site_stopIngestionList()
-portal.ERP5Site_startIngestionList()
 portal.ERP5Site_createDataAnalysisList()
 portal.ERP5Site_executeDataAnalysisList()
--- a/bt5/erp5_wendelin_telecom_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_telecom/Alarm_handleInvalids.py
+++ b/bt5/erp5_wendelin_telecom_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_telecom/Alarm_handleInvalids.py
@@ -10,12 +10,12 @@ now = DateTime()
 now_minus_max = now - 1.0/24/60*9999
 now_minus_4 = now - 1.0/24/60*4

-catalog_kw = {'creation_date': {'query': (now_minus_max, now_minus_4), 'range': 'minmax'}, 'simulation_state': 'planned', 'portal_type': 'Data Ingestion'}
+catalog_kw = {'creation_date': {'query': (now_minus_max, now_minus_4), 'range': 'minmax'}, 'simulation_state': 'started', 'portal_type': 'Data Ingestion'}

 for data_ingestion in portal_catalog(**catalog_kw):
-  # search related data ingestions that are not old yet (less than 10 min)
+  # search related data ingestions that are not old yet (less than 4 min)
  catalog_kw = {'creation_date': {'query': (now_minus_4, DateTime()), 'range': 'minmax'},
-                'simulation_state': 'planned',
+                'simulation_state': 'started',
                'portal_type': 'Data Ingestion',
                'reference': data_ingestion.getReference()}
  invalidate = True
@@ -36,8 +36,9 @@ for data_ingestion in portal_catalog(**catalog_kw):
          context.logEntry("[WARNING] Could not invalidate data stream '%s', it was already invalidated or draft" % data_stream.getId())
        context.logEntry("%s %s (id:%s) invalidated" % (data_stream.getPortalType(), data_stream.getReference(), data_stream.getId()))
    try:
-      data_ingestion.setReference(data_ingestion.getReference() + "_invalid")
-      data_ingestion.cancel()
+      if not data_ingestion.getReference().endswith("_invalid"):
+        data_ingestion.setReference(data_ingestion.getReference() + "_invalid")
+      data_ingestion.deliver()
    except:
-      context.logEntry("[WARNING] Could not invalidate/cancel data ingestion '%s', it was already invalidated/cancelled" % data_ingestion.getId())
-    context.logEntry("%s %s (id:%s) invalidated and cancelled" % (data_ingestion.getPortalType(), data_ingestion.getReference(), data_ingestion.getId()))
+      context.logEntry("[WARNING] Could not invalidate/deliver data ingestion '%s', it was already invalidated/deliver" % data_ingestion.getId())
+    context.logEntry("%s %s (id:%s) invalidated and delivered" % (data_ingestion.getPortalType(), data_ingestion.getReference(), data_ingestion.getId()))
--- a/bt5/erp5_wendelin_telecom_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_telecom/ERP5Site_startIngestionList.py
+++ b/bt5/erp5_wendelin_telecom_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_telecom/ERP5Site_startIngestionList.py
-from Products.ERP5Type.Log import log
-from Products.ZSQLCatalog.SQLCatalog import Query, SimpleQuery
-
-portal = context.getPortalObject()
-portal_catalog = portal.portal_catalog
-
-# start single planned ingestion (not split files)
-for data_ingestion in portal_catalog(portal_type = "Data Ingestion",
-                                     simulation_state = "planned",
-                                     id = "%EOF"):
-  related_split_ingestions = portal_catalog(portal_type = "Data Ingestion",
-                                            reference = data_ingestion.getReference())
-  if len(related_split_ingestions) == 1:
-    context.logEntry("Planned EOF ingestion found: " + data_ingestion.getId())
-    data_stream = portal_catalog.getResultValue(
-      portal_type = 'Data Stream',
-      reference = data_ingestion.getReference())
-    if data_stream is not None:
-      data_stream.validate()
-      data_ingestion.start()
-      context.logEntry("Data Ingestion %s started." % data_ingestion.getId())
-
-# append split ingestions
-for data_ingestion in portal_catalog(portal_type = "Data Ingestion",
-                                     simulation_state = "planned",
-                                     id = "%001"):
-  context.logEntry("Planned split ingestion found: " + data_ingestion.getId())
-  try:
-    last_data_stream_id = ""
-    query = Query(portal_type="Data Stream", reference=data_ingestion.getReference())
-    result_list = portal_catalog(query=query, sort_on=(('creation_date', 'ascending'),))
-    for data_stream in result_list:
-      if data_stream.getId() == data_ingestion.getId():
-        full_data_stream = data_stream
-      else:
-        full_data_stream.appendData(data_stream.getData())
-        last_data_stream_id = data_stream.getId()
-        portal.data_stream_module.deleteContent(data_stream.getId())
-    if last_data_stream_id.endswith("EOF"):
-      full_data_stream.validate()
-      related_split_ingestions = portal_catalog(portal_type = "Data Ingestion",
-                                                reference = data_ingestion.getReference())
-      for ingestion in related_split_ingestions:
-        if ingestion.getId() == full_data_stream.getId():
-          ingestion.start()
-        else:
-          ingestion.cancel()
-      context.logEntry("Chunks of split ingestion where appended into Data Stream %s. Corresponding Data Ingestion started." % full_data_stream.getId())
-  except Exception as e:
-    context.logEntry("ERROR appending split data streams for ingestion: %s" % data_ingestion.getReference())
-    context.logEntry(e)
--- a/bt5/erp5_wendelin_telecom_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_telecom/ERP5Site_startIngestionList.xml
+++ b/bt5/erp5_wendelin_telecom_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_telecom/ERP5Site_startIngestionList.xml
-<?xml version="1.0"?>
-<ZopeData>
-  <record id="1" aka="AAAAAAAAAAE=">
-    <pickle>
-      <global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
-    </pickle>
-    <pickle>
-      <dictionary>
-        <item>
-            <key> <string>Script_magic</string> </key>
-            <value> <int>3</int> </value>
-        </item>
-        <item>
-            <key> <string>_bind_names</string> </key>
-            <value>
-              <object>
-                <klass>
-                  <global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
-                </klass>
-                <tuple/>
-                <state>
-                  <dictionary>
-                    <item>
-                        <key> <string>_asgns</string> </key>
-                        <value>
-                          <dictionary>
-                            <item>
-                                <key> <string>name_container</string> </key>
-                                <value> <string>container</string> </value>
-                            </item>
-                            <item>
-                                <key> <string>name_context</string> </key>
-                                <value> <string>context</string> </value>
-                            </item>
-                            <item>
-                                <key> <string>name_m_self</string> </key>
-                                <value> <string>script</string> </value>
-                            </item>
-                            <item>
-                                <key> <string>name_subpath</string> </key>
-                                <value> <string>traverse_subpath</string> </value>
-                            </item>
-                          </dictionary>
-                        </value>
-                    </item>
-                  </dictionary>
-                </state>
-              </object>
-            </value>
-        </item>
-        <item>
-            <key> <string>_params</string> </key>
-            <value> <string></string> </value>
-        </item>
-        <item>
-            <key> <string>id</string> </key>
-            <value> <string>ERP5Site_startIngestionList</string> </value>
-        </item>
-      </dictionary>
-    </pickle>
-  </record>
-</ZopeData>
--- a/bt5/erp5_wendelin_telecom_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_telecom/ERP5Site_stopIngestionList.py
+++ b/bt5/erp5_wendelin_telecom_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_telecom/ERP5Site_stopIngestionList.py
-from DateTime import DateTime
-from Products.ERP5Type.DateUtils import addToDate
-from Products.ZSQLCatalog.SQLCatalog import Query, SimpleQuery
 from Products.ERP5Type.Log import log
+from Products.ZSQLCatalog.SQLCatalog import Query, SimpleQuery

-portal_catalog = context.getPortalObject().portal_catalog
-
-# search for dates older than one minute ago
-old_start_date = addToDate(DateTime(), {'minute' : -1})
-start_date_query = Query(**{'delivery.start_date': old_start_date, 'range': 'ngt'})
-kw_dict = {"query": start_date_query,
-           "portal_type": "Data Ingestion",
-           "simulation_state": "started"}
-
-parent_uid_list = [x.getUid() for x in portal_catalog(**kw_dict)]
-
-if len(parent_uid_list) != 0:
-  log("Stopping %s ingestions..." %(str(len(parent_uid_list))))
-  context.logEntry("[Called by alarm] Stopping %s ingestions" %(str(len(parent_uid_list))))
-  kw_dict = {"portal_type": "Data Ingestion Line",
-             "resource_portal_type": "Data Product",
-             "parent_uid": parent_uid_list}
+portal = context.getPortalObject()
+portal_catalog = portal.portal_catalog

-  parent_uid_list = [x.getUid() for x in portal_catalog(**kw_dict)]
+# start single started ingestion (not split files)
+for data_ingestion in portal_catalog(portal_type = "Data Ingestion",
+                                     simulation_state = "started",
+                                     id = "%EOF"):
+  if not data_ingestion.getReference().endswith("_invalid"):
+    related_split_ingestions = portal_catalog(portal_type = "Data Ingestion",
+                                              reference = data_ingestion.getReference())
+    if len(related_split_ingestions) == 1:
+      context.logEntry("Started EOF ingestion found: " + data_ingestion.getId())
+      data_stream = portal_catalog.getResultValue(
+        portal_type = 'Data Stream',
+        reference = data_ingestion.getReference())
+      if data_stream is not None:
+        #calculate md5
+        data_stream.validate()
+        data_ingestion.stop()
+        context.logEntry("Data Ingestion %s stopped." % data_ingestion.getId())

-  for data_ingestion_line in portal_catalog(**kw_dict):
+# append split ingestions
+for data_ingestion in portal_catalog(portal_type = "Data Ingestion",
+                                     simulation_state = "started",
+                                     id = "%001"):
+  if not data_ingestion.getReference().endswith("_invalid"):
+    context.logEntry("Started split ingestion found: " + data_ingestion.getId())
    try:
-      data_ingestion = data_ingestion_line.getParentValue()
-      resource = data_ingestion_line.getResourceValue()
-      log("data_ingestion: " + str(data_ingestion.getReference()))
-      log("resource: " + str(resource.getReference()))
-      if "big_data/ingestion/batch_ingestion" in resource.getUseList():
-        if data_ingestion.getSimulationState() == 'started':
-          data_ingestion.setStopDate(DateTime())
-          data_ingestion.stop()
-          log("DATA INGESTION STOPPED")
-          context.logEntry("Data Ingestion '%s' stopped." % data_ingestion.getId())
+      last_data_stream_id = ""
+      query = Query(portal_type="Data Stream", reference=data_ingestion.getReference())
+      result_list = portal_catalog(query=query, sort_on=(('creation_date', 'ascending'),))
+      for data_stream in result_list:
+        if data_stream.getId() == data_ingestion.getId():
+          full_data_stream = data_stream
        else:
-          context.logEntry("[WARNING] Could not stop Data Ingestion '%s' because it is already stopped." % data_ingestion.getId())
-      else:
-        raise ValueError("resource.getUseList must be 'big_data/ingestion/batch_ingestion'")
+          full_data_stream.appendData(data_stream.getData())
+          last_data_stream_id = data_stream.getId()
+          portal.data_stream_module.deleteContent(data_stream.getId())
+      if last_data_stream_id.endswith("EOF"):
+        #calculate md5
+        full_data_stream.validate()
+        related_split_ingestions = portal_catalog(portal_type = "Data Ingestion",
+                                                  reference = data_ingestion.getReference())
+        for ingestion in related_split_ingestions:
+          if ingestion.getId() == full_data_stream.getId():
+            ingestion.stop()
+          else:
+            ingestion.setReference(ingestion.getReference() + "_invalid")
+            ingestion.deliver()
+        context.logEntry("Chunks of split ingestion where appended into Data Stream %s. Corresponding Data Ingestion stopped." % full_data_stream.getId())
    except Exception as e:
-      context.logEntry("[ERROR] Error stopping Data Ingestion '%s': %s." % (data_ingestion.getId()), str(e))
+      context.logEntry("ERROR appending split data streams for ingestion: %s" % data_ingestion.getReference())
+      context.logEntry(e)
--- a/bt5/erp5_wendelin_telecom_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_telecom/IngestionPolicy_getIngestionOperationAndParameterDict.py
+++ b/bt5/erp5_wendelin_telecom_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_telecom/IngestionPolicy_getIngestionOperationAndParameterDict.py
@@ -28,7 +28,7 @@ try:
    reference = data_ingestion_reference)

  if data_ingestion is not None:
-    if data_ingestion.getSimulationState() != 'planned':
+    if data_ingestion.getSimulationState() != 'started':
      context.logEntry("An older ingestion for reference %s was already done." % data_ingestion_reference)
      raise ValueError("An older ingestion for reference %s was already done." % data_ingestion_reference)

@@ -120,8 +120,8 @@ try:
      data_set.validate()
    input_line.setDefaultAggregateValue(data_set)

-  data_ingestion.plan()
-  context.logEntry("Data Ingestion planned.")
+  data_ingestion.start()
+  context.logEntry("Data Ingestion started.")

  data_operation = operation_line.getResourceValue()
  data_stream = input_line.getAggregateDataStreamValue()

--- a/bt5/erp5_wendelin_telecom_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_telecom/ingestionReferenceExists.py
+++ b/bt5/erp5_wendelin_telecom_ingestion/SkinTemplateItem/portal_skins/erp5_wendelin_telecom/ingestionReferenceExists.py
@@ -28,11 +28,13 @@ try:
  if data_ingestion is None:
    return FALSE

-  if data_ingestion.getSimulationState() == 'planned':
+  if data_ingestion.getSimulationState() == 'started':
    try:
      if EOF != "EOF" and int(EOF) == 1:
-        # The user has restarted a split ingestion that is already being processed
-        return TRUE
+        # The user has restarted an interrupted split ingestion
+        context.log("[WARNING] User has restarted an interrumpted ingestion for reference %s." % data_ingestion.getReference())
+        context.log("[WARNING] Previous split ingestions for reference %s will be discarted and full ingestion restarted." % data_ingestion.getReference())
+        portal.ERP5Site_invalidateIngestionObjects(data_ingestion.getReference())
    except:
      pass
    return FALSE

--- a/bt5/erp5_wendelin_telecom_ingestion/TestTemplateItem/portal_components/test.erp5.testDataIngestion.py
+++ b/bt5/erp5_wendelin_telecom_ingestion/TestTemplateItem/portal_components/test.erp5.testDataIngestion.py
@@ -118,11 +118,8 @@ class TestDataIngestion(SecurityTestCase):
      raise StandardError("Could not find ingestion with reference %s" % reference)

  def simulateIngestionAlarm(self, reference, now):
-    self.portal.ERP5Site_startIngestionList()
-    self.tic()
    self.portal.ERP5Site_stopIngestionList()
    self.tic()
-    self.manuallyStopIngestionWorkaround(reference, now)

    self.portal.ERP5Site_createDataAnalysisList()
    time.sleep(5)
@@ -275,7 +272,7 @@ class TestDataIngestion(SecurityTestCase):
    self.ingestRequest('POST', (self.USER, self.PASS), ingestion_reference, self.PART_3, data_chunk_3, ingestion_policy)
    time.sleep(1)
    self.ingestRequest('POST', (self.USER, self.PASS), ingestion_reference, self.EOF, data_chunk_4, ingestion_policy)
-    self.portal.ERP5Site_startIngestionList()
+    self.portal.ERP5Site_stopIngestionList()
    self.tic()
    ingestion_id, ingestion_reference = self.sanitizeReference(ingestion_reference)
    data_stream = self.getDataStream(ingestion_reference)

--- a/bt5/erp5_wendelin_telecom_ingestion/TestTemplateItem/portal_components/test.erp5.testDataIngestion.xml
+++ b/bt5/erp5_wendelin_telecom_ingestion/TestTemplateItem/portal_components/test.erp5.testDataIngestion.xml
@@ -46,15 +46,15 @@
            <key> <string>text_content_warning_message</string> </key>
            <value>
              <tuple>
-                <string>W:141,  4: Unused variable \'ingestion_id\' (unused-variable)</string>
-                <string>W:164, 34: Unused variable \'i\' (unused-variable)</string>
-                <string>W:164, 76: Unused variable \'j\' (unused-variable)</string>
-                <string>W:187,  4: Redefining name \'np\' from outer scope (line 9) (redefined-outer-name)</string>
-                <string>W:187,  4: Reimport \'numpy\' (imported line 9) (reimported)</string>
-                <string>W:203, 11: Using type() instead of isinstance() for a typecheck. (unidiomatic-typecheck)</string>
-                <string>W:207, 10: No exception type(s) specified (bare-except)</string>
-                <string>W:215, 26: Unused variable \'e\' (unused-variable)</string>
-                <string>W:280,  4: Unused variable \'ingestion_id\' (unused-variable)</string>
+                <string>W:138,  4: Unused variable \'ingestion_id\' (unused-variable)</string>
+                <string>W:161, 34: Unused variable \'i\' (unused-variable)</string>
+                <string>W:161, 76: Unused variable \'j\' (unused-variable)</string>
+                <string>W:184,  4: Redefining name \'np\' from outer scope (line 9) (redefined-outer-name)</string>
+                <string>W:184,  4: Reimport \'numpy\' (imported line 9) (reimported)</string>
+                <string>W:200, 11: Using type() instead of isinstance() for a typecheck. (unidiomatic-typecheck)</string>
+                <string>W:204, 10: No exception type(s) specified (bare-except)</string>
+                <string>W:212, 26: Unused variable \'e\' (unused-variable)</string>
+                <string>W:277,  4: Unused variable \'ingestion_id\' (unused-variable)</string>
                <string>W:  8,  0: Unused timedelta imported from datetime (unused-import)</string>
                <string>W: 10,  0: Unused import math (unused-import)</string>
                <string>W: 13,  0: Unused log imported from Products.ERP5Type.Log (unused-import)</string>