Commit f0e1af12 authored by Hardik Juneja's avatar Hardik Juneja

CMFActivity: Remove remaining transcaions.commit and move BTree to ActiveProcess

parent 2a3e619a
......@@ -32,6 +32,7 @@ from Products.CMFCore import permissions as CMFCorePermissions
from Products.ERP5Type.Base import Base
from Products.ERP5Type import PropertySheet
from Products.ERP5Type.ConflictFree import ConflictFreeLog
from BTrees.LOBTree import LOBTree
from BTrees.Length import Length
from random import randrange
from .ActiveResult import ActiveResult
......@@ -85,6 +86,15 @@ class ActiveProcess(Base):
def __init__(self, *args, **kw):
Base.__init__(self, *args, **kw)
self.result_list = ConflictFreeLog()
self.use_btree = False
security.declareProtected(CMFCorePermissions.ManagePortal, 'useBTree')
def useBTree(self):
# Use BTree instead of Linked List
# this is used by joblib Backend to store results in a dictionary with
# signature as key
self.use_btree = True
self.result_list = LOBTree()
security.declareProtected(CMFCorePermissions.ManagePortal, 'postResult')
def postResult(self, result):
......@@ -92,8 +102,19 @@ class ActiveProcess(Base):
result_list = self.result_list
except AttributeError:
# BBB: self was created before implementation of __init__
self.result_list = result_list = ConflictFreeLog()
if self.use_btree:
self.result_list = result_list = LOBTree()
else:
self.result_list = result_list = ConflictFreeLog()
else:
if self.use_btree:
if not hasattr(result, 'sig'):
result_id = randrange(0, 10000 * (id(result) + 1))
else:
result_id = result.sig
result_list.insert(result_id, result)
return
if type(result_list) is not ConflictFreeLog: # BBB: result_list is IOBTree
# use a random id in order to store result in a way with
# fewer conflict errors
......@@ -103,7 +124,12 @@ class ActiveProcess(Base):
result_list[random_id] = result
self.result_len.change(1)
return
result_list.append(result)
if self.use_btree:
signature = int(result.sig, 16)
result_list.insert(signature, result)
else:
result_list.append(result)
security.declareProtected(CMFCorePermissions.ManagePortal, 'postActiveResult')
def postActiveResult(self, *args, **kw):
......@@ -124,6 +150,18 @@ class ActiveProcess(Base):
return result_list.values()
return list(result_list)
security.declareProtected(CMFCorePermissions.ManagePortal, 'getResult')
def getResult(self, key, **kw):
"""
Returns the result with requested key else None
"""
try:
result_list = self.result_list
result = result_list[key]
except:
return None
return result
security.declareProtected(CMFCorePermissions.ManagePortal, 'activateResult')
def activateResult(self, result):
if result not in (None, 0, '', (), []):
......
......@@ -139,7 +139,6 @@ class SQLBase(Queue):
serialization_tag_list = [m.activity_kw.get('serialization_tag', '')
for m in message_list]
processing_node_list = []
for m in message_list:
m.order_validation_text = x = self.getOrderValidationText(m)
processing_node_list.append(0 if x == 'none' else -1)
......
......@@ -26,24 +26,11 @@
#
##############################################################################
# XXX: Note from Rafael
# only reimplment the minimal, and only custom the SQL that update this table.
# Always check if things are there (ie.: If the connection, or the script are present).
import copy
import hashlib
import sys
import transaction
from functools import total_ordering
from zLOG import LOG, TRACE, INFO, WARNING, ERROR, PANIC
from zExceptions import ExceptionFormatter
from ZODB.POSException import ConflictError
from SQLBase import SQLBase, sort_message_key
from Products.CMFActivity.ActivityTool import Message
from Products.CMFActivity.ActivityTool import (
Message, MESSAGE_NOT_EXECUTED, MESSAGE_EXECUTED, SkippedMessage)
from Products.CMFActivity.ActivityRuntimeEnvironment import (
DEFAULT_MAX_RETRY, ActivityRuntimeEnvironment, getTransactionalVariable)
from Queue import Queue, VALIDATION_ERROR_DELAY, VALID, INVALID_PATH
# Stop validating more messages when this limit is reached
......@@ -55,46 +42,6 @@ _DequeueMessageException = Exception()
from SQLDict import SQLDict
# this is improvisation of
# http://stackoverflow.com/questions/5884066/hashing-a-python-dictionary/8714242#8714242
def make_hash(o):
"""
Makes a hash from a dictionary, list, tuple or set to any level, that contains
only other hashable types (including any lists, tuples, sets, and
dictionaries).
"""
if isinstance(o, (set, tuple, list)):
return hash(tuple([make_hash(e) for e in o]))
elif not isinstance(o, dict):
try:
return hash(o)
except TypeError:
return hash(int(hashlib.md5(o).hexdigest(), 16))
new_o = copy.deepcopy(o)
for k, v in new_o.items():
new_o[k] = make_hash(v)
return hash(tuple(frozenset(sorted(new_o.items()))))
@total_ordering
class MyBatchedSignature(object):
"""Create hashable signature"""
def __init__(self, batch):
#LOG('CMFActivity', INFO, batch.items)
items = batch.items[0]
self.func = items[0].__name__
self.args = items[1]
self.kwargs = items[2]
def __eq__(self, other):
return (self.func, self.args) == (other.func, other.args)
def __lt__(self, other):
return (self.func, self.args) < (other.func, other.args)
class SQLJoblib(SQLDict):
"""
XXX SQLJoblib
......@@ -124,7 +71,7 @@ class SQLJoblib(SQLDict):
def register(self, activity_buffer, activity_tool, message):
"""
Send message to mysql directly
Send messages to mysql directly
"""
assert not message.is_registered, message
message.is_registered = True
......@@ -137,7 +84,6 @@ class SQLJoblib(SQLDict):
if m.is_registered:
uid = portal.portal_ids.generateNewIdList(self.uid_group,
id_count=1, id_generator='uid')[0]
#import pdb; pdb.set_trace()
m.order_validation_text = x = self.getOrderValidationText(m)
processing_node = (0 if x == 'none' else -1)
portal.SQLJoblib_writeMessage(
......@@ -151,82 +97,122 @@ class SQLJoblib(SQLDict):
group_method_id=m.getGroupId(),
date=m.activity_kw.get('at_date'),
tag=m.activity_kw.get('tag', ''),
signature=m.activity_kw.get('signature', ''),
processing_node=processing_node,
serialization_tag=m.activity_kw.get('serialization_tag', ''))
# Queue semantic
def dequeueMessage(self, activity_tool, processing_node):
message_list, group_method_id, uid_to_duplicate_uid_list_dict = \
self.getProcessableMessageList(activity_tool, processing_node)
if message_list:
# Remove group_id parameter from group_method_id
if group_method_id is not None:
group_method_id = group_method_id.split('\0')[0]
if group_method_id not in (None, ""):
method = activity_tool.invokeGroup
args = (group_method_id, message_list, self.__class__.__name__,
hasattr(self, 'generateMessageUID'))
activity_runtime_environment = ActivityRuntimeEnvironment(None)
else:
method = activity_tool.invoke
message = message_list[0]
args = (message, )
activity_runtime_environment = ActivityRuntimeEnvironment(message)
# Commit right before executing messages.
# As MySQL transaction does not start exactly at the same time as ZODB
# transactions but a bit later, messages available might be called
# on objects which are not available - or available in an old
# version - to ZODB connector.
# So all connectors must be committed now that we have selected
# everything needed from MySQL to get a fresh view of ZODB objects.
transaction.commit()
transaction.begin()
tv = getTransactionalVariable()
tv['activity_runtime_environment'] = activity_runtime_environment
# Try to invoke
try:
method(*args)
# Abort if at least 1 message failed. On next tic, only those that
# succeeded will be selected because their at_date won't have been
# increased.
for m in message_list:
if m.getExecutionState() == MESSAGE_NOT_EXECUTED:
raise _DequeueMessageException
def getProcessableMessageLoader(self, activity_tool, processing_node):
path_and_method_id_dict = {}
def load(line):
# getProcessableMessageList already fetch messages with the same
# group_method_id, so what remains to be filtered on are path, method_id
# and signature
path = line.path
method_id = line.method_id
key = path, method_id
uid = line.uid
signature = line.signature
original_uid = path_and_method_id_dict.get(key)
if original_uid is None:
m = Message.load(line.message, uid=uid, line=line, signature=signature)
try:
result = activity_tool.SQLJoblib_selectDuplicatedLineList(
path=path,
method_id=method_id,
group_method_id=line.group_method_id,
signature=signature)
reserve_uid_list = uid_list = [x.uid for x in result]
if reserve_uid_list:
activity_tool.SQLJoblib_reserveDuplicatedLineList(
processing_node=processing_node, uid=reserve_uid_list)
except:
self._log(WARNING, 'getDuplicateMessageUidList got an exception')
raise
if uid_list:
self._log(TRACE, 'Reserved duplicate messages: %r' % uid_list)
path_and_method_id_dict[key] = uid
return m, uid, uid_list
# We know that original_uid != uid because caller skips lines we returned
# earlier.
return None, original_uid, [uid]
return load
def generateMessageUID(self, m):
return (tuple(m.object_path), m.method_id, m.activity_kw.get('signature'),
m.activity_kw.get('tag'), m.activity_kw.get('group_id'))
def distribute(self, activity_tool, node_count):
offset = 0
assignMessage = getattr(activity_tool, 'SQLBase_assignMessage', None)
if assignMessage is not None:
now_date = self.getNow(activity_tool)
validated_count = 0
while 1:
result = self._getMessageList(activity_tool, processing_node=-1,
to_date=now_date,
offset=offset, count=READ_MESSAGE_LIMIT)
if not result:
return
transaction.commit()
for m in message_list:
if m.getExecutionState() == MESSAGE_EXECUTED:
transaction.begin()
# Create a signature and then store result into the dict
signature = MyBatchedSignature(m.args[0].batch)
# get active process
active_process = activity_tool.unrestrictedTraverse(m.active_process)
active_process.process_result_map.update({signature: m.result})
transaction.commit()
except:
exc_info = sys.exc_info()
if exc_info[1] is not _DequeueMessageException:
self._log(WARNING,
'Exception raised when invoking messages (uid, path, method_id) %r'
% [(m.uid, m.object_path, m.method_id) for m in message_list])
for m in message_list:
m.setExecutionState(MESSAGE_NOT_EXECUTED, exc_info, log=False)
self._abort()
exc_info = message_list[0].exc_info
if exc_info:
try:
# Register it again.
tv['activity_runtime_environment'] = activity_runtime_environment
cancel = message.on_error_callback(*exc_info)
del exc_info, message.exc_info
transaction.commit()
if cancel:
message.setExecutionState(MESSAGE_EXECUTED)
except:
self._log(WARNING, 'Exception raised when processing error callbacks')
message.setExecutionState(MESSAGE_NOT_EXECUTED)
self._abort()
self.finalizeMessageExecution(activity_tool, message_list,
uid_to_duplicate_uid_list_dict)
transaction.commit()
return not message_list
\ No newline at end of file
validation_text_dict = {'none': 1}
message_dict = {}
for line in result:
message = Message.load(line.message, uid=line.uid, line=line)
if not hasattr(message, 'order_validation_text'): # BBB
message.order_validation_text = self.getOrderValidationText(message)
self.getExecutableMessageList(activity_tool, message, message_dict,
validation_text_dict, now_date=now_date)
if message_dict:
message_unique_dict = {}
serialization_tag_dict = {}
distributable_uid_set = set()
deletable_uid_list = []
# remove duplicates
# SQLDict considers object_path, method_id, tag to unify activities,
# but ignores method arguments. They are outside of semantics.
for message in message_dict.itervalues():
message_unique_dict.setdefault(self.generateMessageUID(message),
[]).append(message)
for message_list in message_unique_dict.itervalues():
if len(message_list) > 1:
# Sort list of duplicates to keep the message with highest score
message_list.sort(key=sort_message_key)
deletable_uid_list += [m.uid for m in message_list[1:]]
message = message_list[0]
serialization_tag = message.activity_kw.get('serialization_tag')
if serialization_tag is None:
distributable_uid_set.add(message.uid)
else:
serialization_tag_dict.setdefault(serialization_tag,
[]).append(message)
# Don't let through if there is the same serialization tag in the
# message dict. If there is the same serialization tag, only one can
# be validated and others must wait.
# But messages with group_method_id are exceptions. serialization_tag
# does not stop validating together. Because those messages should
# be processed together at once.
for message_list in serialization_tag_dict.itervalues():
# Sort list of messages to validate the message with highest score
message_list.sort(key=sort_message_key)
distributable_uid_set.add(message_list[0].uid)
group_method_id = message_list[0].line.group_method_id
if group_method_id == '\0':
continue
for message in message_list[1:]:
if group_method_id == message.line.group_method_id:
distributable_uid_set.add(message.uid)
if deletable_uid_list:
activity_tool.SQLBase_delMessage(table=self.sql_table,
uid=deletable_uid_list)
distributable_count = len(distributable_uid_set)
if distributable_count:
assignMessage(table=self.sql_table,
processing_node=0, uid=tuple(distributable_uid_set))
validated_count += distributable_count
if validated_count >= MAX_VALIDATED_LIMIT:
return
offset += READ_MESSAGE_LIMIT
......@@ -26,16 +26,17 @@
##############################################################################
ENABLE_JOBLIB = True
import copy
import hashlib
import sys
import time
import transaction
from BTrees.OOBTree import OOBTree
from zLOG import LOG, INFO, WARNING
from ZODB.POSException import ConflictError
try:
from sklearn.externals.joblib import register_parallel_backend
from sklearn.externals.joblib.hashing import hash as joblib_hash
from sklearn.externals.joblib.parallel import ParallelBackendBase, parallel_backend
from sklearn.externals.joblib.parallel import FallbackToBackend, SequentialBackend
from sklearn.externals.joblib._parallel_backends import SafeFunction
......@@ -45,13 +46,11 @@ except ImportError:
LOG("CMFActivityBackend", WARNING, "CLASS NOT LOADED!!!")
ENABLE_JOBLIB = False
from Activity.SQLJoblib import MyBatchedSignature
if ENABLE_JOBLIB:
class MySafeFunction(SafeFunction):
"""Wrapper around a SafeFunction that catches any exception
The exception can be handled in CMFActivityResult.get
The exception can be handled in CMFActivityResult.get
"""
def __init__(self, *args, **kwargs):
super(MySafeFunction, self).__init__(*args, **kwargs)
......@@ -67,23 +66,12 @@ if ENABLE_JOBLIB:
self.active_process = active_process
self.active_process_sig = active_process_sig
self.callback = callback
def get(self, timeout=None):
'''
while not self.active_process.getResultList():
time.sleep(1)
if timeout is not None:
timeout -= 1
if timeout < 0:
raise RuntimeError('Timeout reached')
transaction.commit()
'''
if self.active_process.process_result_map[self.active_process_sig] is None:
if self.active_process.getResult(self.active_process_sig) is None:
raise ConflictError
result = self.active_process.process_result_map[self.active_process_sig]
result = self.active_process.getResult(self.active_process_sig).result
# TODO raise before or after the callback?
if isinstance(result, Exception):
raise result
if self.callback is not None:
......@@ -94,10 +82,7 @@ if ENABLE_JOBLIB:
def __init__(self, *args, **kwargs):
self.count = 1
self.active_process = kwargs['active_process']
if not hasattr(self.active_process, 'process_result_map'):
self.active_process.process_result_map = OOBTree()
transaction.commit()
def effective_n_jobs(self, n_jobs):
"""Dummy implementation to prevent n_jobs <=0
......@@ -113,14 +98,16 @@ if ENABLE_JOBLIB:
active_process_id = self.active_process.getId()
joblib_result = None
sig = MyBatchedSignature(batch)
if not self.active_process.process_result_map.has_key(sig):
self.active_process.process_result_map.insert(sig, None)
# create a signature and convert it to integer
sig = joblib_hash(batch.items[0])
sigint = int(sig, 16) % (10 ** 16)
if not self.active_process.getResult(sigint):
joblib_result = portal_activities.activate(activity='SQLJoblib',
tag="joblib_%s" % active_process_id,
active_process=self.active_process).Base_callSafeFunction(MySafeFunction(batch))
signature=sig,
active_process=self.active_process).Base_callSafeFunction(sigint, MySafeFunction(batch))
if joblib_result is None:
joblib_result = CMFActivityResult(self.active_process, sig, callback)
joblib_result = CMFActivityResult(self.active_process, sigint, callback)
return joblib_result
def configure(self, n_jobs=1, parallel=None, **backend_args):
......@@ -138,11 +125,6 @@ if ENABLE_JOBLIB:
def abort_everything(self, ensure_ready=True):
# All jobs will be aborted here while they are still processing our backend
# remove job with no results
#self.active_process.process_result_map = dict((k, v)
# for k, v in self.active_process.process_result_map.iteritems() if v)
transaction.commit()
if ensure_ready:
self.configure(n_jobs=self.parallel.n_jobs, parallel=self.parallel,
**self.parallel._backend_args)
......
......@@ -177,7 +177,6 @@ class Message(BaseMessage):
self.method_id = method_id
self.args = args
self.kw = kw
self.result = None
if getattr(portal_activities, 'activity_creation_trace', False):
# Save current traceback, to make it possible to tell where a message
# was generated.
......@@ -316,12 +315,12 @@ class Message(BaseMessage):
result = method(*self.args, **self.kw)
finally:
setSecurityManager(old_security_manager)
if method is not None:
if self.active_process and result is not None:
self.activateResult(
activity_tool.unrestrictedTraverse(self.active_process),
result, obj)
self.result = result
self.setExecutionState(MESSAGE_EXECUTED)
except:
self.setExecutionState(MESSAGE_NOT_EXECUTED, context=activity_tool)
......@@ -504,7 +503,6 @@ class Method(object):
request=self._request,
portal_activities=portal_activities,
)
if portal_activities.activity_tracking:
activity_tracking_logger.info('queuing message: activity=%s, object_path=%s, method_id=%s, args=%s, kw=%s, activity_kw=%s, user_name=%s' % (self._activity, '/'.join(m.object_path), m.method_id, m.args, m.kw, m.activity_kw, m.user_name))
portal_activities.getActivityBuffer().deferredQueueMessage(
......@@ -1065,6 +1063,7 @@ class ActivityTool (Folder, UniqueObject):
processing_node starts from 1 (there is not node 0)
"""
global active_threads
# return if the number of threads is too high
# else, increase the number of active_threads and continue
tic_lock.acquire()
......
......@@ -20,6 +20,7 @@ CREATE TABLE <dtml-var table> (
`priority` TINYINT NOT NULL DEFAULT 0,
`group_method_id` VARCHAR(255) NOT NULL DEFAULT '',
`tag` VARCHAR(255) NOT NULL,
`signature` VARCHAR(255) NOT NULL,
`serialization_tag` VARCHAR(255) NOT NULL,
`retry` TINYINT UNSIGNED NOT NULL DEFAULT 0,
`message` LONGBLOB NOT NULL,
......
<dtml-comment>
title:
connection_id:cmf_activity_sql_connection
max_rows:0
max_cache:0
cache_time:0
class_name:
class_file:
</dtml-comment>
<params>
processing_node
uid
</params>
UPDATE
message_job
SET
processing_node=<dtml-sqlvar processing_node type="int">
WHERE
<dtml-sqltest uid type="int" multiple>
<dtml-var sql_delimiter>
COMMIT
<dtml-comment>
title:
connection_id:cmf_activity_sql_connection
max_rows:0
max_cache:0
cache_time:0
class_name:
class_file:
</dtml-comment>
<params>
path
method_id
group_method_id
signature
</params>
SELECT uid FROM
message_job
WHERE
processing_node = 0
AND path = <dtml-sqlvar path type="string">
AND method_id = <dtml-sqlvar method_id type="string">
AND group_method_id = <dtml-sqlvar group_method_id type="string">
AND signature = <dtml-sqlvar signature type="string">
FOR UPDATE
......@@ -18,10 +18,11 @@ processing_node
date
group_method_id
tag
signature
serialization_tag
</params>
INSERT INTO <dtml-var table>
(uid, path, active_process_uid, date, method_id, processing_node, processing, priority, group_method_id, tag, serialization_tag, message)
(uid, path, active_process_uid, date, method_id, processing_node, processing, priority, group_method_id, tag, signature, serialization_tag, message)
VALUES
(
<dtml-sqlvar expr="uid" type="int">,
......@@ -34,6 +35,7 @@ VALUES
<dtml-sqlvar expr="priority" type="int">,
<dtml-sqlvar expr="group_method_id" type="string">,
<dtml-sqlvar expr="tag" type="string">,
<dtml-sqlvar expr="signature" type="string">,
<dtml-sqlvar expr="serialization_tag" type="string">,
<dtml-sqlvar expr="message" type="string">
)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment