Commit a3eedef3 authored by Roque Porchetto's avatar Roque Porchetto

erp5_wendelin_telecom_ingestion: metadata of ingested files

- metadata is stored as JSON
- data operation leads with metadata of each type of files
- minor fix: setting title in data streams
parent 7a267634
...@@ -4,6 +4,7 @@ import json ...@@ -4,6 +4,7 @@ import json
from DateTime import DateTime from DateTime import DateTime
from mne.report import Report from mne.report import Report
from Products.ERP5Type.Log import log from Products.ERP5Type.Log import log
from lxml.html import parse
CHUNK_SIZE = 200000 CHUNK_SIZE = 200000
...@@ -25,7 +26,7 @@ def saveRawFile(data_stream, file_name): ...@@ -25,7 +26,7 @@ def saveRawFile(data_stream, file_name):
n_chunk += 1 n_chunk += 1
log("Done.") log("Done.")
def getHTMLReport(file_name, raw): def getJSONReport(file_name, raw):
try: try:
pattern = file_name + "_raw.fif" pattern = file_name + "_raw.fif"
report_file = file_name + 'report.html' report_file = file_name + 'report.html'
...@@ -33,11 +34,16 @@ def getHTMLReport(file_name, raw): ...@@ -33,11 +34,16 @@ def getHTMLReport(file_name, raw):
report = Report(verbose=True) report = Report(verbose=True)
report.parse_folder(data_path="./", pattern=[pattern]) report.parse_folder(data_path="./", pattern=[pattern])
report.save(report_file, overwrite=True, open_browser=False) report.save(report_file, overwrite=True, open_browser=False)
with open(report_file, 'r') as report: data = {}
content = report.read() doc = parse(report_file)
return content results = doc.xpath("//table[@class = 'table table-hover']")
rows = iter(results[0])
for row in rows:
data[row[0].text] = row[1].text
json_data = json.dumps(data)
return json_data
except Exception as e: except Exception as e:
log("Error while getting HTML Report: " + str(e)) log("Error while getting JSON Report: " + str(e))
return "" return ""
finally: finally:
if os.path.exists(pattern): if os.path.exists(pattern):
...@@ -45,14 +51,6 @@ def getHTMLReport(file_name, raw): ...@@ -45,14 +51,6 @@ def getHTMLReport(file_name, raw):
if os.path.exists(report_file): if os.path.exists(report_file):
os.remove(report_file) os.remove(report_file)
def getJSONMetadata(raw_info):
info = { 'filename': 'FILENAME', #raw_info['filename'],
'nchan': raw_info['nchan'],
'ch_names': raw_info['ch_names'],
'sfreq': raw_info['sfreq']
}
return json.dumps(info)
def getRawData(file_name): def getRawData(file_name):
raw = None raw = None
try: try:
...@@ -69,20 +67,20 @@ def getRawData(file_name): ...@@ -69,20 +67,20 @@ def getRawData(file_name):
def processFifData(file_name, data_array, data_descriptor): def processFifData(file_name, data_array, data_descriptor):
raw = getRawData(file_name) raw = getRawData(file_name)
try: try:
html_report = getHTMLReport(file_name, raw) json_report = getJSONReport(file_name, raw)
data_descriptor.setTextContent(html_report) data_descriptor.setTextContent(json_report)
log("Data Descriptor content saved") log("Data Descriptor content saved")
except Exception as e: except Exception as e:
log("Error handling Data Descriptor content: " + str(e)) log("Error handling Data Descriptor content: " + str(e))
log("Saving raw data in Data Array...") log("Saving raw data in Data Array...")
picks = mne.pick_types(raw.info) picks = mne.pick_types(raw.info)
if len(picks) == 0: raise StandardError("The raw data does not contain any element") if len(picks) == 0: raise StandardError("The raw data does not contain any element")
data, times = raw[picks[:1]] # get data from first pick to get shape data, times = raw[picks[:1]] # get data from first pick to get shape
dtype = data.dtype dtype = data.dtype
data_array.initArray(data.shape, dtype) data_array.initArray(data.shape, dtype)
zarray = data_array.getArray() zarray = data_array.getArray()
zarray[0] = data[0] zarray[0] = data[0]
...@@ -106,7 +104,7 @@ def processCsvData(file_name, data_array, data_descriptor): ...@@ -106,7 +104,7 @@ def processCsvData(file_name, data_array, data_descriptor):
def processRawData(data_stream, data_array, data_descriptor, reference_extension): def processRawData(data_stream, data_array, data_descriptor, reference_extension):
import time import time
start = time.time() start = time.time()
file_name = "temporal_file_%s" % DateTime().strftime('%Y%m%d-%H%M%S') file_name = "temporal_file_%s" % DateTime().strftime('%Y%m%d-%H%M%S')
try: try:
saveRawFile(data_stream, file_name) saveRawFile(data_stream, file_name)
......
...@@ -52,9 +52,9 @@ ...@@ -52,9 +52,9 @@
<key> <string>text_content_warning_message</string> </key> <key> <string>text_content_warning_message</string> </key>
<value> <value>
<tuple> <tuple>
<string>W: 60, 2: No exception type(s) specified (bare-except)</string> <string>W: 58, 2: No exception type(s) specified (bare-except)</string>
<string>W: 65, 4: No exception type(s) specified (bare-except)</string> <string>W: 63, 4: No exception type(s) specified (bare-except)</string>
<string>W: 83, 8: Unused variable \'times\' (unused-variable)</string> <string>W: 81, 8: Unused variable \'times\' (unused-variable)</string>
</tuple> </tuple>
</value> </value>
</item> </item>
......
...@@ -34,7 +34,7 @@ for data_array in portal_catalog(**kw_dict): ...@@ -34,7 +34,7 @@ for data_array in portal_catalog(**kw_dict):
# invalidate old (more than 10min) pending ingestions (split ingestions that were canceled/interrumped) # invalidate old (more than 10min) pending ingestions (e.g. split ingestions that were canceled/interrumped)
from DateTime import DateTime from DateTime import DateTime
now = DateTime() now = DateTime()
now_minus_max = now - 1.0/24/60*9999 now_minus_max = now - 1.0/24/60*9999
......
...@@ -88,6 +88,7 @@ try: ...@@ -88,6 +88,7 @@ try:
data_stream = portal.data_stream_module.newContent( data_stream = portal.data_stream_module.newContent(
portal_type = "Data Stream", portal_type = "Data Stream",
title = data_ingestion.getTitle(),
reference = data_ingestion_reference) reference = data_ingestion_reference)
data_stream.validate() data_stream.validate()
......
...@@ -4,16 +4,35 @@ from Products.ZSQLCatalog.SQLCatalog import Query, SimpleQuery ...@@ -4,16 +4,35 @@ from Products.ZSQLCatalog.SQLCatalog import Query, SimpleQuery
portal = context.getPortalObject() portal = context.getPortalObject()
portal_catalog = portal.portal_catalog portal_catalog = portal.portal_catalog
query = Query(portal_type="Data Descriptor") data_ingestion = portal_catalog.getResultValue(
portal_type = 'Data Ingestion',
reference = reference)
for document in portal_catalog(query=query, try:
sort_on=(('id', 'DESC', 'int'),)): if data_ingestion is None or data_ingestion.getSimulationState() != 'delivered':
if document.reference == reference: return '{"metadata":"Metadata not ready yet, please wait some minutes."}'
found = True
content = document.getTextContent() query = Query(portal_type="Data Analysis", reference=reference)
if content is not None: result_list = portal_catalog(query=query, sort_on=(('id', 'DESC', 'int'),))
return content if len(result_list) == 0:
else: return '{"metadata":"Metadata not ready yet, please wait some minutes."}'
return "No metadata available for this type of file yet" data_analysis = result_list[0]
if content is None: if data_analysis.getSimulationState() != 'stopped':
return "Data descriptor not found" return '{"metadata":"Metadata not ready yet, please wait some minutes."}'
query = Query(portal_type="Data Descriptor")
content = None
for document in portal_catalog(query=query,
sort_on=(('id', 'DESC', 'int'),)):
if document.reference == reference:
content = document.getTextContent()
if content is not None:
return content
else:
return '{"metadata":"No metadata available for this type of file yet"}'
return '{"metadata":"No metadata descriptor found for this file"}'
except Exception as e:
log("Error getting data descriptor content: " + str(e))
return '{"metadata":"No metadata descriptor found for this file"}'
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment