Commit 3c599bad authored by Ivan Tyagov's avatar Ivan Tyagov

Fix missing data objects

See merge request !82
parents 458e9eea 4bdf9716
...@@ -10,9 +10,8 @@ ...@@ -10,9 +10,8 @@
<key> <string>categories</string> </key> <key> <string>categories</string> </key>
<value> <value>
<tuple> <tuple>
<string>resource/data_operation_module/55</string>
<string>quantity_unit/unit/piece</string>
<string>aggregate/data_acquisition_unit_module/wendelin_environment_sensor</string> <string>aggregate/data_acquisition_unit_module/wendelin_environment_sensor</string>
<string>resource/data_operation_module/resample_raw_array_all</string>
</tuple> </tuple>
</value> </value>
</item> </item>
...@@ -20,6 +19,12 @@ ...@@ -20,6 +19,12 @@
<key> <string>default_reference</string> </key> <key> <string>default_reference</string> </key>
<value> <string>data_operation</string> </value> <value> <string>data_operation</string> </value>
</item> </item>
<item>
<key> <string>description</string> </key>
<value>
<none/>
</value>
</item>
<item> <item>
<key> <string>id</string> </key> <key> <string>id</string> </key>
<value> <string>1</string> </value> <value> <string>1</string> </value>
...@@ -29,12 +34,14 @@ ...@@ -29,12 +34,14 @@
<value> <int>1</int> </value> <value> <int>1</int> </value>
</item> </item>
<item> <item>
<key> <string>portal_type</string> </key> <key> <string>language</string> </key>
<value> <string>Data Analysis Line</string> </value> <value>
<none/>
</value>
</item> </item>
<item> <item>
<key> <string>quantity</string> </key> <key> <string>portal_type</string> </key>
<value> <float>1.0</float> </value> <value> <string>Data Analysis Line</string> </value>
</item> </item>
<item> <item>
<key> <string>title</string> </key> <key> <string>title</string> </key>
...@@ -102,7 +109,7 @@ ...@@ -102,7 +109,7 @@
</item> </item>
<item> <item>
<key> <string>serial</string> </key> <key> <string>serial</string> </key>
<value> <string>989.23165.41501.63539</string> </value> <value> <string>989.57713.57849.45226</string> </value>
</item> </item>
<item> <item>
<key> <string>state</string> </key> <key> <string>state</string> </key>
...@@ -120,7 +127,7 @@ ...@@ -120,7 +127,7 @@
</tuple> </tuple>
<state> <state>
<tuple> <tuple>
<float>1610720792.52</float> <float>1612791496.77</float>
<string>UTC</string> <string>UTC</string>
</tuple> </tuple>
</state> </state>
......
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="Data Operation" module="erp5.portal_type"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_Access_contents_information_Permission</string> </key>
<value>
<tuple>
<string>Assignee</string>
<string>Assignor</string>
<string>Associate</string>
<string>Auditor</string>
<string>Manager</string>
</tuple>
</value>
</item>
<item>
<key> <string>_Add_portal_content_Permission</string> </key>
<value>
<tuple>
<string>Assignee</string>
<string>Assignor</string>
<string>Associate</string>
<string>Manager</string>
</tuple>
</value>
</item>
<item>
<key> <string>_Modify_portal_content_Permission</string> </key>
<value>
<tuple>
<string>Assignee</string>
<string>Assignor</string>
<string>Associate</string>
<string>Manager</string>
</tuple>
</value>
</item>
<item>
<key> <string>_View_Permission</string> </key>
<value>
<tuple>
<string>Assignee</string>
<string>Assignor</string>
<string>Associate</string>
<string>Auditor</string>
<string>Manager</string>
</tuple>
</value>
</item>
<item>
<key> <string>categories</string> </key>
<value>
<tuple>
<string>quantity_unit/unit/piece</string>
</tuple>
</value>
</item>
<item>
<key> <string>default_reference</string> </key>
<value> <string>data-operation-resample-raw-array-all</string> </value>
</item>
<item>
<key> <string>description</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>resample_raw_array_all</string> </value>
</item>
<item>
<key> <string>language</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>Data Operation</string> </value>
</item>
<item>
<key> <string>script_id</string> </key>
<value> <string>DataAnalysisLine_resampleRawArrayAll</string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Resample Raw Array All</string> </value>
</item>
<item>
<key> <string>version</string> </key>
<value> <string>001</string> </value>
</item>
<item>
<key> <string>workflow_history</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAI=</string> </persistent>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="2" aka="AAAAAAAAAAI=">
<pickle>
<global name="PersistentMapping" module="Persistence.mapping"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>data</string> </key>
<value>
<dictionary>
<item>
<key> <string>edit_workflow</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAM=</string> </persistent>
</value>
</item>
<item>
<key> <string>validation_workflow</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAQ=</string> </persistent>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="3" aka="AAAAAAAAAAM=">
<pickle>
<global name="WorkflowHistoryList" module="Products.ERP5Type.Workflow"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_log</string> </key>
<value>
<list>
<dictionary>
<item>
<key> <string>action</string> </key>
<value> <string>edit</string> </value>
</item>
<item>
<key> <string>actor</string> </key>
<value> <string>zope</string> </value>
</item>
<item>
<key> <string>comment</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>error_message</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>serial</string> </key>
<value> <string>989.57688.26982.29934</string> </value>
</item>
<item>
<key> <string>state</string> </key>
<value> <string>current</string> </value>
</item>
<item>
<key> <string>time</string> </key>
<value>
<object>
<klass>
<global name="DateTime" module="DateTime.DateTime"/>
</klass>
<tuple>
<none/>
</tuple>
<state>
<tuple>
<float>1612790970.55</float>
<string>UTC</string>
</tuple>
</state>
</object>
</value>
</item>
</dictionary>
</list>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="4" aka="AAAAAAAAAAQ=">
<pickle>
<global name="WorkflowHistoryList" module="Products.ERP5Type.Workflow"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_log</string> </key>
<value>
<list>
<dictionary>
<item>
<key> <string>action</string> </key>
<value> <string>validate</string> </value>
</item>
<item>
<key> <string>actor</string> </key>
<value> <string>zope</string> </value>
</item>
<item>
<key> <string>comment</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>error_message</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>time</string> </key>
<value>
<object>
<klass>
<global name="DateTime" module="DateTime.DateTime"/>
</klass>
<tuple>
<none/>
</tuple>
<state>
<tuple>
<float>1612789933.83</float>
<string>UTC</string>
</tuple>
</state>
</object>
</value>
</item>
<item>
<key> <string>validation_state</string> </key>
<value> <string>validated</string> </value>
</item>
</dictionary>
</list>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
import numpy as np
import pandas as pd
out_array_list = out_array
in_data_array = in_array["Data Array"]
progress_indicator = in_array["Progress Indicator"]
in_zbigarray = in_data_array.getArray()
if in_zbigarray is None:
return
if in_zbigarray.shape[0] == 0:
return
# first fill array with lowest resolution
default_data_array = out_array_list[0]["Data Array"]
default_resolution = out_array_list[0]["resolution"]
default_frequency = pd.to_timedelta(default_resolution)
default_zbigarray = default_data_array.getArray()
index = progress_indicator.getIntOffsetIndex()
# convert data to DataFrame
df = pd.DataFrame.from_records(in_zbigarray[index:].copy(), index='date')
# ignore data before start date of output array
if default_zbigarray is not None:
if default_zbigarray.shape[0] != 0:
df = df.loc[str(default_zbigarray[0]['date']):]
if len(df) == 0:
return
# resample
df = df.resample(default_resolution).agg(['min','mean','max']).fillna(0)
# rename columns from tuples like ('x', 'min') to names like 'x_min'
df.columns = ['%s%s' % (a, '_%s' % b if b else '') for a, b in df.columns]
context.log("df.columns = ", df.columns)
# save date vector for later
date_vector = df.index.values.copy()
context.log("date_vector = ", date_vector)
# convert data back to ndarray
default_data = df.to_records(convert_datetime64=False)
# view as structured array
# set date to zero where all values are 0
mask_zero = (df==0).all(axis=1)
default_data['date'][mask_zero] = 0
if default_zbigarray is None:
default_zbigarray = default_data_array.initArray(shape=(0,), dtype=default_data.dtype.fields)
if default_zbigarray.shape[0] == 0:
default_zbigarray.append(default_data)
else:
# calculate start and stop index of new data in output array
default_start_index = int((date_vector[0] - default_zbigarray[0]['date']) / default_frequency)
default_stop_index = int((date_vector[-1] - default_zbigarray[0]['date']) / default_frequency + 1)
# make sure data fits in
if default_stop_index > default_zbigarray.shape[0]:
default_zbigarray.resize((default_stop_index,))
# fill holes in new data with values from old data
old_data = default_zbigarray[default_start_index:default_stop_index]
default_data[mask_zero ] = old_data[mask_zero]
# write new_data to zbigarray
default_zbigarray[default_start_index:default_stop_index] = default_data
# now use data in first resolution array for all other arrays
for out_array in out_array_list[1:]:
out_data_array = out_array["Data Array"]
out_array_resolution = out_array["resolution"]
out_zbigarray = out_data_array.getArray()
if out_zbigarray is None:
out_zbigarray = out_data_array.initArray(shape=(0,), dtype=default_data.dtype.fields)
if out_zbigarray.shape[0] == 0:
start_index = 0
else:
out_array_frequency = pd.to_timedelta(out_array_resolution)
new_stop_date = default_zbigarray[0]['date'] + default_zbigarray.shape[0] * default_frequency
old_stop_date = out_zbigarray[0]['date'] + out_zbigarray.shape[0] * out_array_frequency
start_date = old_stop_date - out_array_frequency
if old_stop_date >= new_stop_date:
continue
# find row index in in_array from where to start resampling
start_index = int(max((start_date - default_zbigarray[0]['date']) / default_frequency, 0))
# if we got data which has been already resampled, then we resample again and overwrite
start_index = min(start_index, default_start_index)
data = default_zbigarray[start_index:].copy()
# convert data to DataFrame and resample
df = pd.DataFrame.from_records(data, index='date')
# set our own date range index so that we can resample and keep 0-dates
resampling_start_date = default_zbigarray[0]['date'] + start_index * default_frequency
df.index = pd.date_range(start=resampling_start_date,
periods=data.shape[0],
freq=default_frequency)
df.index.name = 'date'
# resample each column with appropriate aggregation method
aggregation_dict = {c: c.split('_')[-1] for c in df.columns}
df = df.resample(out_array_resolution).agg(aggregation_dict).fillna(0)
# save date vector for later
date_vector = df.index.values.copy()
# convert data back to ndarray
new_data = df.to_records(convert_datetime64=False)
# set date to zero where all values are 0
new_data['date'][(df==0).all(axis=1)] = 0
if out_zbigarray.shape[0] == 0:
out_zbigarray.append(new_data)
else:
# calculate start and stop index of new data in output array
start_index = int((date_vector[0] - out_zbigarray[0]['date']) / out_array_frequency)
stop_index = int((date_vector[-1] - out_zbigarray[0]['date']) / out_array_frequency + 1)
progress_indicator.setIntOffsetIndex(in_zbigarray.shape[0])
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="Python Script" module="erp5.portal_type"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_local_properties</string> </key>
<value>
<tuple>
<dictionary>
<item>
<key> <string>id</string> </key>
<value> <string>reference</string> </value>
</item>
<item>
<key> <string>type</string> </key>
<value> <string>string</string> </value>
</item>
</dictionary>
</tuple>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>in_array={}, out_array=[]</string> </value>
</item>
<item>
<key> <string>description</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>DataAnalysisLine_resampleRawArrayAll</string> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>Python Script</string> </value>
</item>
<item>
<key> <string>reference</string> </key>
<value> <string>DataAnalysisLine_resampleRawArrayAll</string> </value>
</item>
<item>
<key> <string>workflow_history</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAI=</string> </persistent>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="2" aka="AAAAAAAAAAI=">
<pickle>
<global name="PersistentMapping" module="Persistence.mapping"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>data</string> </key>
<value>
<dictionary>
<item>
<key> <string>edit_workflow</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAM=</string> </persistent>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="3" aka="AAAAAAAAAAM=">
<pickle>
<global name="WorkflowHistoryList" module="Products.ERP5Type.Workflow"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_log</string> </key>
<value>
<list>
<dictionary>
<item>
<key> <string>action</string> </key>
<value> <string>edit</string> </value>
</item>
<item>
<key> <string>actor</string> </key>
<value> <string>zope</string> </value>
</item>
<item>
<key> <string>comment</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>error_message</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>serial</string> </key>
<value> <string>989.57689.51948.56439</string> </value>
</item>
<item>
<key> <string>state</string> </key>
<value> <string>current</string> </value>
</item>
<item>
<key> <string>time</string> </key>
<value>
<object>
<klass>
<global name="DateTime" module="DateTime.DateTime"/>
</klass>
<tuple>
<none/>
</tuple>
<state>
<tuple>
<float>1612790667.9</float>
<string>UTC</string>
</tuple>
</state>
</object>
</value>
</item>
</dictionary>
</list>
</value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
...@@ -12,6 +12,7 @@ data_ingestion_module/wendelin_date-sensor_1 ...@@ -12,6 +12,7 @@ data_ingestion_module/wendelin_date-sensor_1
data_ingestion_module/wendelin_date-sensor_1/** data_ingestion_module/wendelin_date-sensor_1/**
data_operation_module/wendelin_convert_environment_raw_data data_operation_module/wendelin_convert_environment_raw_data
data_operation_module/wendelin_ingest_data data_operation_module/wendelin_ingest_data
data_operation_module/resample_raw_array_all
data_product_module/wendelin_environment_raw_array data_product_module/wendelin_environment_raw_array
data_product_module/wendelin_environment_raw_array/** data_product_module/wendelin_environment_raw_array/**
data_product_module/wendelin_generic_interval_resampled_array data_product_module/wendelin_generic_interval_resampled_array
...@@ -28,6 +29,7 @@ organisation_module/wendelin_nexedi_data_center ...@@ -28,6 +29,7 @@ organisation_module/wendelin_nexedi_data_center
organisation_module/wendelin_sample_company organisation_module/wendelin_sample_company
organisation_module/wendelin_sample_factory organisation_module/wendelin_sample_factory
portal_callables/DataAnalysisLine_convertEnvironmentDataStreamToArray portal_callables/DataAnalysisLine_convertEnvironmentDataStreamToArray
portal_callables/DataAnalysisLine_resampleRawArrayAll
progress_indicator_module/wendelin_convert_environment_raw_data progress_indicator_module/wendelin_convert_environment_raw_data
progress_indicator_module/wendelin_resample_generic_interval_array progress_indicator_module/wendelin_resample_generic_interval_array
web_page_module/rjs_ndarray_bundle_js web_page_module/rjs_ndarray_bundle_js
\ No newline at end of file
...@@ -12,6 +12,7 @@ data_ingestion_module/wendelin_date-sensor_1 ...@@ -12,6 +12,7 @@ data_ingestion_module/wendelin_date-sensor_1
data_ingestion_module/wendelin_date-sensor_1/** data_ingestion_module/wendelin_date-sensor_1/**
data_operation_module/wendelin_convert_environment_raw_data data_operation_module/wendelin_convert_environment_raw_data
data_operation_module/wendelin_ingest_data data_operation_module/wendelin_ingest_data
data_operation_module/resample_raw_array_all
data_product_module/wendelin_environment_raw_array data_product_module/wendelin_environment_raw_array
data_product_module/wendelin_environment_raw_array/** data_product_module/wendelin_environment_raw_array/**
data_product_module/wendelin_generic_interval_resampled_array data_product_module/wendelin_generic_interval_resampled_array
...@@ -28,6 +29,7 @@ organisation_module/wendelin_nexedi_data_center ...@@ -28,6 +29,7 @@ organisation_module/wendelin_nexedi_data_center
organisation_module/wendelin_sample_company organisation_module/wendelin_sample_company
organisation_module/wendelin_sample_factory organisation_module/wendelin_sample_factory
portal_callables/DataAnalysisLine_convertEnvironmentDataStreamToArray portal_callables/DataAnalysisLine_convertEnvironmentDataStreamToArray
portal_callables/DataAnalysisLine_resampleRawArrayAll
progress_indicator_module/wendelin_convert_environment_raw_data progress_indicator_module/wendelin_convert_environment_raw_data
progress_indicator_module/wendelin_resample_generic_interval_array progress_indicator_module/wendelin_resample_generic_interval_array
web_page_module/rjs_ndarray_bundle_js web_page_module/rjs_ndarray_bundle_js
\ No newline at end of file
...@@ -12,6 +12,7 @@ data_ingestion_module/wendelin_date-sensor_1 ...@@ -12,6 +12,7 @@ data_ingestion_module/wendelin_date-sensor_1
data_ingestion_module/wendelin_date-sensor_1/** data_ingestion_module/wendelin_date-sensor_1/**
data_operation_module/wendelin_convert_environment_raw_data data_operation_module/wendelin_convert_environment_raw_data
data_operation_module/wendelin_ingest_data data_operation_module/wendelin_ingest_data
data_operation_module/resample_raw_array_all
data_product_module/wendelin_environment_raw_array data_product_module/wendelin_environment_raw_array
data_product_module/wendelin_environment_raw_array/** data_product_module/wendelin_environment_raw_array/**
data_product_module/wendelin_generic_interval_resampled_array data_product_module/wendelin_generic_interval_resampled_array
...@@ -28,6 +29,7 @@ organisation_module/wendelin_nexedi_data_center ...@@ -28,6 +29,7 @@ organisation_module/wendelin_nexedi_data_center
organisation_module/wendelin_sample_company organisation_module/wendelin_sample_company
organisation_module/wendelin_sample_factory organisation_module/wendelin_sample_factory
portal_callables/DataAnalysisLine_convertEnvironmentDataStreamToArray portal_callables/DataAnalysisLine_convertEnvironmentDataStreamToArray
portal_callables/DataAnalysisLine_resampleRawArrayAll
progress_indicator_module/wendelin_convert_environment_raw_data progress_indicator_module/wendelin_convert_environment_raw_data
progress_indicator_module/wendelin_resample_generic_interval_array progress_indicator_module/wendelin_resample_generic_interval_array
web_page_module/rjs_ndarray_bundle_js web_page_module/rjs_ndarray_bundle_js
\ No newline at end of file
...@@ -10,6 +10,7 @@ data_array_module/wendelin_resample_generic_interval_array_30000 ...@@ -10,6 +10,7 @@ data_array_module/wendelin_resample_generic_interval_array_30000
data_array_module/wendelin_resample_generic_interval_array_60 data_array_module/wendelin_resample_generic_interval_array_60
data_ingestion_module/wendelin_date-sensor_1 data_ingestion_module/wendelin_date-sensor_1
data_ingestion_module/wendelin_date-sensor_1/** data_ingestion_module/wendelin_date-sensor_1/**
data_operation_module/resample_raw_array_all
data_operation_module/wendelin_convert_environment_raw_data data_operation_module/wendelin_convert_environment_raw_data
data_operation_module/wendelin_ingest_data data_operation_module/wendelin_ingest_data
data_product_module/wendelin_environment_raw_array data_product_module/wendelin_environment_raw_array
...@@ -28,6 +29,7 @@ organisation_module/wendelin_nexedi_data_center ...@@ -28,6 +29,7 @@ organisation_module/wendelin_nexedi_data_center
organisation_module/wendelin_sample_company organisation_module/wendelin_sample_company
organisation_module/wendelin_sample_factory organisation_module/wendelin_sample_factory
portal_callables/DataAnalysisLine_convertEnvironmentDataStreamToArray portal_callables/DataAnalysisLine_convertEnvironmentDataStreamToArray
portal_callables/DataAnalysisLine_resampleRawArrayAll
progress_indicator_module/wendelin_convert_environment_raw_data progress_indicator_module/wendelin_convert_environment_raw_data
progress_indicator_module/wendelin_resample_generic_interval_array progress_indicator_module/wendelin_resample_generic_interval_array
web_page_module/rjs_ndarray_bundle_js web_page_module/rjs_ndarray_bundle_js
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment