From 2071ee261227ff6b77f36098e953977b2fdf94b1 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier <vincent@nexedi.com> Date: Wed, 30 Jan 2008 13:32:26 +0000 Subject: [PATCH] Replace sumCatalogResultByWorklist (was O(distinct_criterion_value_count ** 2)) with an O(SQL_result_line_count * worklists_count) implementation. Speed decrease is expected on "small" configurations (because of high SQL_result_line_count when not using SQL-cached worklists). Speed increase was measured when using SQL-cached worklists even with many worklists (40+). If speed decrease is too critical, it should be possible to keep both implementations and implement a short heuristic to choose between both. git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@18923 20353a03-c40f-0410-a6d1-a30d3c3de9de --- product/ERP5Type/patches/WorkflowTool.py | 113 +++++++---------------- 1 file changed, 31 insertions(+), 82 deletions(-) diff --git a/product/ERP5Type/patches/WorkflowTool.py b/product/ERP5Type/patches/WorkflowTool.py index 060e4e6c74..df8fb46c86 100644 --- a/product/ERP5Type/patches/WorkflowTool.py +++ b/product/ERP5Type/patches/WorkflowTool.py @@ -32,6 +32,7 @@ from sets import ImmutableSet from Acquisition import aq_base from Persistence import Persistent from Globals import PersistentMapping +from itertools import izip def DCWorkflowDefinition_notifyWorkflowMethod(self, ob, transition_list, args=None, kw=None): ''' @@ -325,43 +326,6 @@ def getWorklistListQuery(grouped_worklist_dict): assert COUNT_COLUMN_TITLE not in total_criterion_id_dict return (total_criterion_id_list, query) -def _ensemblistMultiply(ensemble_a, ensemble_b): - """ - Do the ensemblist multiplication on ensemble_a and ensemble_b. - Ensembles must be lists of tuples. - Returns a list of tuples. - Order is preserved. - """ - result = [] - for a in ensemble_a: - for b in ensemble_b: - result.append(a + b) - return result - -def ensemblistMultiply(ensemble_list): - """ - Return a list of tuple generated from the ensemblist multiplication of - given ensemble list. - Order is preserved: - - Ensemble N will always appear on the Nth position of output tuples. - - Nth entry of input list will always appear after N-1th and before N+1th. - Any number of ensemble can be provided in the parameter list. - - Example: - Input: - [['a', 'b', 'c'], [0, 1]] - Output: - [('a', 0), ('a', 1), ('b', 0), ('b', 1), ('c', 0), ('c', 1)] - """ - ensemble_list_len = len(ensemble_list) - if ensemble_list_len == 0: - return [] - result = [(x, ) for x in ensemble_list[0]] - for ensemble_position in xrange(1, len(ensemble_list)): - ensemble_b = [(x, ) for x in ensemble_list[ensemble_position]] - result = _ensemblistMultiply(result, ensemble_b) - return result - def sumCatalogResultByWorklist(grouped_worklist_dict, catalog_result): """ Return a dict regrouping each worklist's result, extracting it from @@ -373,56 +337,41 @@ def sumCatalogResultByWorklist(grouped_worklist_dict, catalog_result): It is better to avoid reading multiple times the catalog result from flexibility point of view: if it must ever be changed into a cursor, this code will keep working nicely without needing to rewind the cursor. + + This code assumes that all worklists have the same set of criterion ids, + and that when a criterion id is associated with an ExclusionList it is + also true for all worklists. """ worklist_result_dict = {} if len(catalog_result) > 0: - # List all unique criterions in criterion_id_list - criterion_id_dict = {} - for worklist in grouped_worklist_dict.itervalues(): - for criterion_id, criterion_value in worklist.iteritems(): - if not isinstance(criterion_value, ExclusionList): - criterion_id_dict[criterion_id] = None - criterion_id_list = criterion_id_dict.keys() - class_dict = dict([(name, value.__class__) for name, value in \ - zip(catalog_result.names(), catalog_result[0])]) - # Group all worklists concerned by a set of criterion values in - # criterion_value_to_worklist_key_dict - # key: criterion value tuple, in the same order as in criterion_id_list - # value: list of ids of every concerned worklist - criterion_value_to_worklist_key_dict = {} - for worklist_id, criterion_dict in grouped_worklist_dict.iteritems(): - # Transtype values to match catalog-provided type. - for criterion_id in criterion_id_list: - criterion_value_list = criterion_dict[criterion_id] - expected_class = class_dict[criterion_id] - if not isinstance(criterion_value_list[0], expected_class): - criterion_dict[criterion_id] = [expected_class(x) for x in - criterion_value_list] - # Get all the possible combinations of values for all criterions for this - # worklist. Worklist filtering on portal_type='Foo' and - # validation_state in ['draft', 'validated'] is "interested" by both - # ('Foo', 'draft') and ('Foo', 'validated'). This generates both tuples - # when given initial filter. - criterion_value_key_list = ensemblistMultiply([criterion_dict[x] for x in \ - criterion_id_list]) - for criterion_value_key in criterion_value_key_list: - if criterion_value_key not in criterion_value_to_worklist_key_dict: - criterion_value_to_worklist_key_dict[criterion_value_key] = [] - criterion_value_to_worklist_key_dict[criterion_value_key].append( - worklist_id) + # Transtype all worklist definitions where needed + criterion_id_list = [] + class_dict = dict(((name, value.__class__) for name, value in \ + izip(catalog_result.names(), catalog_result[0]))) + for criterion_dict in grouped_worklist_dict.itervalues(): + for criterion_id, criterion_value_list in criterion_dict.iteritems(): + if type(criterion_value_list) is not ExclusionList: + criterion_id_list.append(criterion_id) + expected_class = class_dict[criterion_id] + if type(criterion_value_list[0]) is not expected_class: + criterion_dict[criterion_id] = ImmutableSet([expected_class(x) for x in criterion_value_list]) + elif type(criterion_value_list) is not ImmutableSet: + criterion_dict[criterion_id] = ImmutableSet(criterion_dict[criterion_id]) # Read catalog result and distribute to matching worklists for result_line in catalog_result: - criterion_value_key = tuple([result_line[x] for x in criterion_id_list]) - if criterion_value_key not in criterion_value_to_worklist_key_dict: - LOG('WorkflowTool_listActions', WARNING, - 'No worklist can be found for result combination %s' % \ - (repr(criterion_value_key), )) - continue - for worklist_id in \ - criterion_value_to_worklist_key_dict[criterion_value_key]: - count = worklist_result_dict.get(worklist_id, 0) - worklist_result_dict[worklist_id] = count + \ - int(result_line[COUNT_COLUMN_TITLE]) + result_count = int(result_line[COUNT_COLUMN_TITLE]) + for worklist_id, criterion_dict in grouped_worklist_dict.iteritems(): + is_candidate = True + for criterion_id in criterion_id_list: + criterion_value_set = criterion_dict[criterion_id] + if result_line[criterion_id] not in criterion_value_set: + is_candidate = False + break + if is_candidate: + try: + worklist_result_dict[worklist_id] += result_count + except KeyError: + worklist_result_dict[worklist_id] = result_count return worklist_result_dict def generateActionList(worklist_metadata, worklist_result, portal_url): -- 2.30.9