Commit ec299ba6 authored by Ivan Tyagov's avatar Ivan Tyagov

A simple example of a map reduce job that stores intermediate result to an...

A simple example of a map reduce job that stores intermediate result to an Active Process and calculates average on slices of a ZBig Array.
parent d7693e87
...@@ -207,13 +207,40 @@ context.activate().DataStream_readChunkListAndTransform( \ ...@@ -207,13 +207,40 @@ context.activate().DataStream_readChunkListAndTransform( \
# test that extracted array contains same values as input CSV # test that extracted array contains same values as input CSV
zarray = data_array.getArray() zarray = data_array.getArray()
self.assertEqual(np.average(zarray), np.average(np.arange(10001, 200001))) expected_numpy_array = np.arange(10001, 200001)
self.assertTrue(np.array_equal(zarray, np.arange(10001, 200001))) self.assertEqual(np.average(zarray), np.average(expected_numpy_array))
self.assertTrue(np.array_equal(zarray, expected_numpy_array))
# clean up script # clean up script
portal.portal_skins.custom.manage_delObjects([script_id,]) portal.portal_skins.custom.manage_delObjects([script_id,])
self.tic() self.tic()
# analyze numpy array using activities.
active_process = portal.portal_activities.newActiveProcess()
zarray = data_array.getArray()
max_elements = zarray.shape[0]
expected_result_list = []
jobs = 15
offset = max_elements / jobs
start = 0
end = start + offset
for i in range(jobs):
# calculate directly expectations
expected_result_list.append(np.average(expected_numpy_array[start:end]))
data_array.activate(
active_process = active_process.getPath(), \
activity='SQLQueue').DataArray_calculateArraySliceAverageAndStore(start, end)
data_array.log('%s %s' %(start, end))
start += offset
end += offset
self.tic()
result_list = [x.getResult() for x in active_process.getResultList()]
self.assertSameSet(result_list, expected_result_list)
# final reduce job to a number
sum(result_list)
def test_02_Examples(self): def test_02_Examples(self):
""" """
Test we can use python scientific libraries by using directly created Test we can use python scientific libraries by using directly created
......
...@@ -51,8 +51,9 @@ ...@@ -51,8 +51,9 @@
<string>W:118, 4: Unused variable \'pandas\' (unused-variable)</string> <string>W:118, 4: Unused variable \'pandas\' (unused-variable)</string>
<string>W:117, 4: Unused variable \'sklearn\' (unused-variable)</string> <string>W:117, 4: Unused variable \'sklearn\' (unused-variable)</string>
<string>W:136, 22: Unused variable \'data_supply\' (unused-variable)</string> <string>W:136, 22: Unused variable \'data_supply\' (unused-variable)</string>
<string>W:176, 4: Unused variable \'ingestion_policy\' (unused-variable)</string>
<string>W:176, 22: Unused variable \'data_supply\' (unused-variable)</string> <string>W:176, 22: Unused variable \'data_supply\' (unused-variable)</string>
<string>W:227, 8: Unused variable \'i\' (unused-variable)</string>
<string>W:176, 4: Unused variable \'ingestion_policy\' (unused-variable)</string>
</tuple> </tuple>
</value> </value>
</item> </item>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment