Commit 72a0730d authored by Ivan Tyagov's avatar Ivan Tyagov

Use proper script to get all Data Streams for a Data Set rather than rely on reference. Test it.

parent eb51cb86
......@@ -4,7 +4,7 @@
Note: This code is quite computationally costly (for Data Streams having thousands of iles) as it needs to:
1. Query MariaDB to find ingestion lines
2. Read from ZODB both Data Ingestion Lines and Data Streams (whoch itself can be big too)
2. Read from ZODB both Data Ingestion Lines and Data Streams (which itself can be big too)
"""
data_ingestion_line_list = context.portal_catalog(
portal_type = "Data Ingestion Line",
......
"""
This script is called from ebulk client to get list of Data Streams for a
Data set.
"""
import re
import json
from Products.ERP5Type.Log import log
......@@ -20,14 +25,8 @@ data_set = portal.data_set_module.get(data_set_reference)
if data_set is None:
return []
# XXX: use DataSet_getDataStreamList instead!
query_dict = {
"portal_type": "Data Stream",
"reference": data_set.getReference() + reference_separator + "%"}
data_stream_list = []
for stream in portal_catalog(**query_dict):
for stream in data_set.DataSet_getDataStreamList():
if stream.getVersion() == "":
return { "status_code": 2, "result": [] }
data_stream_list.append({ 'id': 'data_stream_module/'+stream.getId(),
......
......@@ -84,7 +84,6 @@ class TestDataIngestion(SecurityTestCase):
def ingest(self, data_chunk, reference, extension, eof, randomize_ingestion_reference=False):
ingestion_reference = self.getIngestionReference(reference, extension, randomize_ingestion_reference)
self.portal.log(ingestion_reference)
# use default ebulk policy
ingestion_policy = self.portal.portal_ingestion_policies.wendelin_embulk
......@@ -197,6 +196,9 @@ class TestDataIngestion(SecurityTestCase):
"""
data_set, data_stream_list = self.stepIngest(self.CSV, ",", randomize_ingestion_reference=True)
self.tic()
# check data relation between Data Set and Data Streams work
self.assertSameSet(data_stream_list, data_set.DataSet_getDataStreamList())
# publish data set and have all Data Streams publsihed automatically
data_set.publish()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment