Commit a3eedef3 authored by Roque Porchetto's avatar Roque Porchetto

erp5_wendelin_telecom_ingestion: metadata of ingested files

- metadata is stored as JSON
- data operation leads with metadata of each type of files
- minor fix: setting title in data streams
parent 7a267634
......@@ -4,6 +4,7 @@ import json
from DateTime import DateTime
from mne.report import Report
from Products.ERP5Type.Log import log
from lxml.html import parse
CHUNK_SIZE = 200000
......@@ -25,7 +26,7 @@ def saveRawFile(data_stream, file_name):
n_chunk += 1
log("Done.")
def getHTMLReport(file_name, raw):
def getJSONReport(file_name, raw):
try:
pattern = file_name + "_raw.fif"
report_file = file_name + 'report.html'
......@@ -33,11 +34,16 @@ def getHTMLReport(file_name, raw):
report = Report(verbose=True)
report.parse_folder(data_path="./", pattern=[pattern])
report.save(report_file, overwrite=True, open_browser=False)
with open(report_file, 'r') as report:
content = report.read()
return content
data = {}
doc = parse(report_file)
results = doc.xpath("//table[@class = 'table table-hover']")
rows = iter(results[0])
for row in rows:
data[row[0].text] = row[1].text
json_data = json.dumps(data)
return json_data
except Exception as e:
log("Error while getting HTML Report: " + str(e))
log("Error while getting JSON Report: " + str(e))
return ""
finally:
if os.path.exists(pattern):
......@@ -45,14 +51,6 @@ def getHTMLReport(file_name, raw):
if os.path.exists(report_file):
os.remove(report_file)
def getJSONMetadata(raw_info):
info = { 'filename': 'FILENAME', #raw_info['filename'],
'nchan': raw_info['nchan'],
'ch_names': raw_info['ch_names'],
'sfreq': raw_info['sfreq']
}
return json.dumps(info)
def getRawData(file_name):
raw = None
try:
......@@ -69,20 +67,20 @@ def getRawData(file_name):
def processFifData(file_name, data_array, data_descriptor):
raw = getRawData(file_name)
try:
html_report = getHTMLReport(file_name, raw)
data_descriptor.setTextContent(html_report)
json_report = getJSONReport(file_name, raw)
data_descriptor.setTextContent(json_report)
log("Data Descriptor content saved")
except Exception as e:
log("Error handling Data Descriptor content: " + str(e))
log("Saving raw data in Data Array...")
picks = mne.pick_types(raw.info)
if len(picks) == 0: raise StandardError("The raw data does not contain any element")
data, times = raw[picks[:1]] # get data from first pick to get shape
data, times = raw[picks[:1]] # get data from first pick to get shape
dtype = data.dtype
data_array.initArray(data.shape, dtype)
zarray = data_array.getArray()
zarray[0] = data[0]
......@@ -106,7 +104,7 @@ def processCsvData(file_name, data_array, data_descriptor):
def processRawData(data_stream, data_array, data_descriptor, reference_extension):
import time
start = time.time()
file_name = "temporal_file_%s" % DateTime().strftime('%Y%m%d-%H%M%S')
try:
saveRawFile(data_stream, file_name)
......
......@@ -52,9 +52,9 @@
<key> <string>text_content_warning_message</string> </key>
<value>
<tuple>
<string>W: 60, 2: No exception type(s) specified (bare-except)</string>
<string>W: 65, 4: No exception type(s) specified (bare-except)</string>
<string>W: 83, 8: Unused variable \'times\' (unused-variable)</string>
<string>W: 58, 2: No exception type(s) specified (bare-except)</string>
<string>W: 63, 4: No exception type(s) specified (bare-except)</string>
<string>W: 81, 8: Unused variable \'times\' (unused-variable)</string>
</tuple>
</value>
</item>
......
......@@ -34,7 +34,7 @@ for data_array in portal_catalog(**kw_dict):
# invalidate old (more than 10min) pending ingestions (split ingestions that were canceled/interrumped)
# invalidate old (more than 10min) pending ingestions (e.g. split ingestions that were canceled/interrumped)
from DateTime import DateTime
now = DateTime()
now_minus_max = now - 1.0/24/60*9999
......
......@@ -88,6 +88,7 @@ try:
data_stream = portal.data_stream_module.newContent(
portal_type = "Data Stream",
title = data_ingestion.getTitle(),
reference = data_ingestion_reference)
data_stream.validate()
......
......@@ -4,16 +4,35 @@ from Products.ZSQLCatalog.SQLCatalog import Query, SimpleQuery
portal = context.getPortalObject()
portal_catalog = portal.portal_catalog
query = Query(portal_type="Data Descriptor")
data_ingestion = portal_catalog.getResultValue(
portal_type = 'Data Ingestion',
reference = reference)
for document in portal_catalog(query=query,
sort_on=(('id', 'DESC', 'int'),)):
if document.reference == reference:
found = True
content = document.getTextContent()
if content is not None:
return content
else:
return "No metadata available for this type of file yet"
if content is None:
return "Data descriptor not found"
try:
if data_ingestion is None or data_ingestion.getSimulationState() != 'delivered':
return '{"metadata":"Metadata not ready yet, please wait some minutes."}'
query = Query(portal_type="Data Analysis", reference=reference)
result_list = portal_catalog(query=query, sort_on=(('id', 'DESC', 'int'),))
if len(result_list) == 0:
return '{"metadata":"Metadata not ready yet, please wait some minutes."}'
data_analysis = result_list[0]
if data_analysis.getSimulationState() != 'stopped':
return '{"metadata":"Metadata not ready yet, please wait some minutes."}'
query = Query(portal_type="Data Descriptor")
content = None
for document in portal_catalog(query=query,
sort_on=(('id', 'DESC', 'int'),)):
if document.reference == reference:
content = document.getTextContent()
if content is not None:
return content
else:
return '{"metadata":"No metadata available for this type of file yet"}'
return '{"metadata":"No metadata descriptor found for this file"}'
except Exception as e:
log("Error getting data descriptor content: " + str(e))
return '{"metadata":"No metadata descriptor found for this file"}'
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment