Commit 8a278cbd authored by Vincent Pelletier's avatar Vincent Pelletier

{ERP5,ZSQL}Catalog: Wrap all documents at once.

Allows reducing the number of user existence check queries from one per
document to one per document batch, reducing database (currently, catalog)
latency effect on indexation time.
Also, avoid object -> id -> object dance for SQLCatalog instance while at
it.
Also, modernise syntax a bit, avoiding intermediate locals mostly.
parent b62e851e
...@@ -69,9 +69,13 @@ ZOPE_SECURITY_SUFFIX = '__roles__' ...@@ -69,9 +69,13 @@ ZOPE_SECURITY_SUFFIX = '__roles__'
SECURITY_QUERY_ARGUMENT_NAME = 'ERP5Catalog_security_query' SECURITY_QUERY_ARGUMENT_NAME = 'ERP5Catalog_security_query'
class IndexableObjectWrapper(object): class IndexableObjectWrapper(object):
__security_parameter_cache = None
__local_role_cache = None
def __init__(self, ob): def __init__(self, ob, user_set, role_dict):
self.__ob = ob self.__ob = ob
self.__user_set = user_set
self.__role_dict = role_dict
def __getattr__(self, name): def __getattr__(self, name):
return getattr(self.__ob, name) return getattr(self.__ob, name)
...@@ -80,16 +84,16 @@ class IndexableObjectWrapper(object): ...@@ -80,16 +84,16 @@ class IndexableObjectWrapper(object):
uid = property(lambda self: self.__ob.getUid(), uid = property(lambda self: self.__ob.getUid(),
lambda self, value: setattr(self.__ob, 'uid', value)) lambda self, value: setattr(self.__ob, 'uid', value))
def _getSecurityParameterList(self): def __getLocalRoleDict(self):
result = self.__dict__.get('_cache_result', None) local_role_dict = self.__local_role_cache
if result is None: if local_role_dict is None:
ob = self.__ob ob = self.__ob
# For each group or user, we have a list of roles, this list # For each group or user, we have a list of roles, this list
# give in this order : [roles on object, roles acquired on the parent, # give in this order : [roles on object, roles acquired on the parent,
# roles acquired on the parent of the parent....] # roles acquired on the parent of the parent....]
# So if we have ['-Author','Author'] we should remove the role 'Author' # So if we have ['-Author','Author'] we should remove the role 'Author'
# but if we have ['Author','-Author'] we have to keep the role 'Author' # but if we have ['Author','-Author'] we have to keep the role 'Author'
localroles = {} local_role_dict = {}
skip_role_set = set() skip_role_set = set()
skip_role = skip_role_set.add skip_role = skip_role_set.add
clear_skip_role = skip_role_set.clear clear_skip_role = skip_role_set.clear
...@@ -103,26 +107,28 @@ class IndexableObjectWrapper(object): ...@@ -103,26 +107,28 @@ class IndexableObjectWrapper(object):
elif role not in skip_role_set: elif role not in skip_role_set:
new_role(role) new_role(role)
if new_role_list: if new_role_list:
localroles[key] = [new_role_list, False] local_role_dict[key] = [new_role_list, False]
self.__local_role_cache = local_role_dict
return local_role_dict
portal = ob.getPortalObject() def _getSecurityGroupIdList(self):
role_dict = dict(portal.portal_catalog.getSQLCatalog().\ """
getSQLCatalogRoleKeysList()) Return the list of security group identifiers this document is
for user_info in portal.acl_users.searchUsers(id=tuple(localroles), exact_match=True): interested in. They may be actual user identifiers or not.
key = user_info['id'] Supposed to be accessed by CatalogTool.
try: """
localroles[key][1] = True return self.__getLocalRoleDict().keys()
except KeyError:
# We found a bug, report it but do not make indexation fail. def _getSecurityParameterList(self):
LOG( result = self.__security_parameter_cache
'CatalogTool.IndexableObjectWrapper', if result is None:
PROBLEM, ob = self.__ob
'searchUser(id=%r, exact_match=True) returned an entry with ' local_role_dict = self.__getLocalRoleDict()
'id=%r. This is very likely a bugin a PAS plugin !' % (
tuple(localroles), role_dict = self.__role_dict
key, user_set = self.__user_set
), for security_group_id, security_group_data in local_role_dict.iteritems():
) security_group_data[1] = security_group_id in user_set
allowed_dict = {} allowed_dict = {}
...@@ -176,7 +182,7 @@ class IndexableObjectWrapper(object): ...@@ -176,7 +182,7 @@ class IndexableObjectWrapper(object):
optimized_role_set.add((user, role)) optimized_role_set.add((user, role))
user_role_dict = {} user_role_dict = {}
user_view_permission_role_dict = {} user_view_permission_role_dict = {}
for user, (roles, user_exists) in localroles.iteritems(): for user, (roles, user_exists) in local_role_dict.iteritems():
prefix = 'user:' + user prefix = 'user:' + user
for role in roles: for role in roles:
is_not_in_optimised_role_set = (user, role) not in optimized_role_set is_not_in_optimised_role_set = (user, role) not in optimized_role_set
...@@ -204,9 +210,11 @@ class IndexableObjectWrapper(object): ...@@ -204,9 +210,11 @@ class IndexableObjectWrapper(object):
sorted_allowed_by_local_roles_group_id[local_roles_group_id] = tuple( sorted_allowed_by_local_roles_group_id[local_roles_group_id] = tuple(
sorted(allowed)) sorted(allowed))
self._cache_result = result = (sorted_allowed_by_local_roles_group_id, self.__security_parameter_cache = result = (
user_role_dict, sorted_allowed_by_local_roles_group_id,
user_view_permission_role_dict) user_role_dict,
user_view_permission_role_dict,
)
return result return result
def getLocalRolesGroupIdDict(self): def getLocalRolesGroupIdDict(self):
...@@ -761,18 +769,24 @@ class CatalogTool (UniqueObject, ZCatalog, CMFCoreCatalogTool, ActiveObject): ...@@ -761,18 +769,24 @@ class CatalogTool (UniqueObject, ZCatalog, CMFCoreCatalogTool, ActiveObject):
""" """
return ZCatalog.countResults(self, REQUEST, **kw) return ZCatalog.countResults(self, REQUEST, **kw)
def wrapObject(self, object, sql_catalog_id=None, **kw): def wrapObjectList(self, object_value_list, catalog_value):
""" """
Return a wrapped object for reindexing. Return a list of wrapped objects for reindexing.
""" """
catalog = self.getSQLCatalog(sql_catalog_id) portal = self.getPortalObject()
if catalog is None:
# Nothing to do. user_set = set()
LOG('wrapObject', 0, 'Warning: catalog is not available') role_dict = dict(catalog_value.getSQLCatalogRoleKeysList())
return (None, None) catalog_security_uid_groups_columns_dict = catalog_value.getSQLCatalogSecurityUidGroupsColumnsDict()
default_security_uid_column = catalog_security_uid_groups_columns_dict['']
document_object = aq_inner(object) getPredicatePropertyDict = catalog_value.getPredicatePropertyDict
w = IndexableObjectWrapper(document_object) security_group_set = set()
wrapper_list = []
for object_value in object_value_list:
document_object = aq_inner(object_value)
w = IndexableObjectWrapper(document_object, user_set, role_dict)
w.predicate_property_dict = getPredicatePropertyDict(object_value) or {}
security_group_set.update(w._getSecurityGroupIdList())
# Find the parent definition for security # Find the parent definition for security
is_acquired = 0 is_acquired = 0
...@@ -787,37 +801,43 @@ class CatalogTool (UniqueObject, ZCatalog, CMFCoreCatalogTool, ActiveObject): ...@@ -787,37 +801,43 @@ class CatalogTool (UniqueObject, ZCatalog, CMFCoreCatalogTool, ActiveObject):
else: else:
break break
if is_acquired: if is_acquired:
document_w = IndexableObjectWrapper(document_object) document_w = IndexableObjectWrapper(document_object, user_set, role_dict)
security_group_set.update(document_w._getSecurityGroupIdList())
else: else:
document_w = w document_w = w
wrapper_list.append((document_object, w, document_w))
# Note: we mutate the set, so all related wrappers get (purposedly)
# affected by this, which must happen before _getSecurityParameterList
# is called (which happens when calling getSecurityUidDict below).
user_set.update(
x['id'] for x in portal.acl_users.searchUsers(
id=list(security_group_set),
exact_match=True,
)
)
(security_uid_dict, optimised_roles_and_users) = \ getSecurityUidDict = catalog_value.getSecurityUidDict
catalog.getSecurityUidDict(document_w) getSubjectSetUid = catalog_value.getSubjectSetUid
wrapped_object_list = []
for (document_object, w, document_w) in wrapper_list:
w.optimised_roles_and_users = optimised_roles_and_users (
security_uid_dict,
catalog_security_uid_groups_columns_dict = \ w.optimised_roles_and_users,
catalog.getSQLCatalogSecurityUidGroupsColumnsDict() ) = getSecurityUidDict(document_w)
default_security_uid_column = catalog_security_uid_groups_columns_dict[''] for local_roles_group_id, security_uid in security_uid_dict.iteritems():
for local_roles_group_id, security_uid in security_uid_dict.items():
catalog_column = catalog_security_uid_groups_columns_dict.get( catalog_column = catalog_security_uid_groups_columns_dict.get(
local_roles_group_id, default_security_uid_column) local_roles_group_id,
default_security_uid_column,
)
setattr(w, catalog_column, security_uid) setattr(w, catalog_column, security_uid)
(
w.subject_set_uid,
w.optimised_subject_list,
) = getSubjectSetUid(document_w)
# XXX we should build vars begore building the wrapper wrapped_object_list.append(ImplicitAcquisitionWrapper(w, aq_parent(document_object)))
return wrapped_object_list
predicate_property_dict = catalog.getPredicatePropertyDict(object)
if predicate_property_dict is not None:
w.predicate_property_dict = predicate_property_dict
else:
w.predicate_property_dict = {}
(subject_set_uid, optimised_subject_list) = catalog.getSubjectSetUid(document_w)
w.optimised_subject_list = optimised_subject_list
w.subject_set_uid = subject_set_uid
return ImplicitAcquisitionWrapper(w, aq_parent(document_object))
security.declarePrivate('reindexObject') security.declarePrivate('reindexObject')
def reindexObject(self, object, idxs=None, sql_catalog_id=None,**kw): def reindexObject(self, object, idxs=None, sql_catalog_id=None,**kw):
......
...@@ -692,15 +692,14 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -692,15 +692,14 @@ class ZCatalog(Folder, Persistent, Implicit):
""" """
return [] return []
security.declarePrivate('wrapObject') security.declarePrivate('wrapObjectList')
def wrapObject(self, object, **kw): def wrapObjectList(self, object_value_list, catalog_value):
""" """
Return a wrapped object for reindexing. Return a list of wrapped objects for reindexing.
This method should be overridden if necessary. This method should be overridden if necessary.
""" """
#LOG('ZSQLCatalog wrapObject', 0, 'object = %r, kw = %r' % (object, kw)) return object_list
return object
security.declareProtected(manage_zcatalog_entries, 'catalog_object') security.declareProtected(manage_zcatalog_entries, 'catalog_object')
def catalog_object(self, obj, url=None, idxs=[], is_object_moved=0, sql_catalog_id=None, **kw): def catalog_object(self, obj, url=None, idxs=[], is_object_moved=0, sql_catalog_id=None, **kw):
...@@ -717,7 +716,6 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -717,7 +716,6 @@ class ZCatalog(Folder, Persistent, Implicit):
(catalog is not None) and \ (catalog is not None) and \
(self.source_sql_catalog_id == catalog.id) (self.source_sql_catalog_id == catalog.id)
archiving = self.archive_path is not None archiving = self.archive_path is not None
wrapped_object_list = []
failed_object_list = [] failed_object_list = []
url_list = [] url_list = []
archive_list = [] archive_list = []
...@@ -757,6 +755,7 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -757,6 +755,7 @@ class ZCatalog(Folder, Persistent, Implicit):
default_catalog = self.getSQLCatalog() default_catalog = self.getSQLCatalog()
# Construct list of object to catalogged # Construct list of object to catalogged
current_catalog_object_list = []
for obj in object_list: for obj in object_list:
if hot_reindexing: if hot_reindexing:
try: try:
...@@ -794,10 +793,7 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -794,10 +793,7 @@ class ZCatalog(Folder, Persistent, Implicit):
goto_current_catalog = 1 goto_current_catalog = 1
if goto_current_catalog: if goto_current_catalog:
# wrap object only when sure it will be reindex now current_catalog_object_list.append(obj)
# thus security uid is also reindex
wrap_obj = self.wrapObject(obj, sql_catalog_id=sql_catalog_id)
wrapped_object_list.append(wrap_obj)
# run activity or execute for each archive depending on priority # run activity or execute for each archive depending on priority
if catalog_dict: if catalog_dict:
...@@ -806,49 +802,56 @@ class ZCatalog(Folder, Persistent, Implicit): ...@@ -806,49 +802,56 @@ class ZCatalog(Folder, Persistent, Implicit):
# if we reindex in current catalog, do not relaunch an activity for this # if we reindex in current catalog, do not relaunch an activity for this
continue continue
d = catalog_dict[catalog_id] d = catalog_dict[catalog_id]
# build the wrapped object list
wrapped_object_list_2 = []
for obj in d['obj']:
try:
wrap_obj = self.wrapObject(obj, sql_catalog_id=catalog_id)
wrapped_object_list_2.append(wrap_obj)
except ConflictError:
raise
except:
LOG('WARNING ZSQLCatalog', 0, 'wrapObject failed on the object %r' % (obj,), error=sys.exc_info())
failed_object_list.append(obj)
# hot_reindexing is True when creating an object during a hot reindex, in this case, we don't want # hot_reindexing is True when creating an object during a hot reindex, in this case, we don't want
# to reindex it in destination catalog, it will be recorded an play only once # to reindex it in destination catalog, it will be recorded an play only once
if not hot_reindexing and self.hot_reindexing_state != HOT_REINDEXING_DOUBLE_INDEXING_STATE and \ if not hot_reindexing and self.hot_reindexing_state != HOT_REINDEXING_DOUBLE_INDEXING_STATE and \
self.destination_sql_catalog_id == catalog_id: self.destination_sql_catalog_id == catalog_id:
destination_catalog = self.getSQLCatalog(self.destination_sql_catalog_id) destination_catalog = self.getSQLCatalog(self.destination_sql_catalog_id)
# reindex objects in destination catalog # reindex objects in destination catalog
destination_catalog.catalogObjectList(wrapped_object_list_2, **kw) destination_catalog.catalogObjectList(
self.wrapObjectList(
object_value_list=d['obj'],
catalog_value=destination_catalog,
),
**kw
)
else: else:
archive_catalog = self.getSQLCatalog(catalog_id) archive_catalog = self.getSQLCatalog(catalog_id)
if immediate_reindex_archive: if immediate_reindex_archive:
archive_catalog.catalogObjectList(wrapped_object_list_2, **kw) archive_catalog.catalogObjectList(
self.wrapObjectList(
object_value_list=d['obj'],
catalog_value=archive_catalog,
),
**kw
)
else: else:
for obj in d['obj']: for obj in d['obj']:
obj._reindexObject(sql_catalog_id=catalog_id, activate_kw = \ obj._reindexObject(sql_catalog_id=catalog_id, activate_kw = \
{'priority': d['priority']}, disable_archive=1, **kw) {'priority': d['priority']}, disable_archive=1, **kw)
if catalog is not None: if catalog is not None:
if wrapped_object_list: if current_catalog_object_list:
catalog.catalogObjectList(wrapped_object_list, **kw) catalog.catalogObjectList(
self.wrapObjectList(
object_value_list=current_catalog_object_list,
catalog_value=catalog,
),
**kw
)
if hot_reindexing: if hot_reindexing:
destination_catalog = self.getSQLCatalog(self.destination_sql_catalog_id) destination_catalog = self.getSQLCatalog(self.destination_sql_catalog_id)
if destination_catalog.id != catalog.id: if destination_catalog.id != catalog.id:
if self.hot_reindexing_state == HOT_REINDEXING_RECORDING_STATE: if self.hot_reindexing_state == HOT_REINDEXING_RECORDING_STATE:
destination_catalog.recordObjectList(url_list, 1) destination_catalog.recordObjectList(url_list, 1)
else: elif object_list:
wrapped_destination_object_list = [] destination_catalog.catalogObjectList(
for obj in object_list: self.wrapObjectList(
wrap_obj = self.wrapObject(obj, sql_catalog_id=self.destination_sql_catalog_id) object_value_list=object_list,
wrapped_destination_object_list.append(wrap_obj) catalog_value=destination_catalog,
if wrapped_destination_object_list: ),
destination_catalog.catalogObjectList(wrapped_destination_object_list,**kw) **kw
)
object_list[:] = failed_object_list object_list[:] = failed_object_list
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment