Commit a483a51d authored by Yoshinori Okuji's avatar Yoshinori Okuji

Split object_list in catalogObjectList to save memory. Remove...

Split object_list in catalogObjectList to save memory. Remove expression_cache, as it is no good. Stop importing unused symbols.

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@14058 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent d69ba952
...@@ -18,19 +18,18 @@ import ExtensionClass ...@@ -18,19 +18,18 @@ import ExtensionClass
import Globals import Globals
import OFS.History import OFS.History
from Globals import DTMLFile, PersistentMapping from Globals import DTMLFile, PersistentMapping
from string import lower, split, join from string import split, join
from thread import allocate_lock, get_ident from thread import allocate_lock, get_ident
from OFS.Folder import Folder from OFS.Folder import Folder
from AccessControl import ClassSecurityInfo, getSecurityManager from AccessControl import ClassSecurityInfo
from BTrees.OIBTree import OIBTree from BTrees.OIBTree import OIBTree
from App.config import getConfiguration from App.config import getConfiguration
from BTrees.Length import Length from BTrees.Length import Length
from Shared.DC.ZRDB.TM import TM from Shared.DC.ZRDB.TM import TM
from DateTime import DateTime from DateTime import DateTime
from Products.PluginIndexes.common.randid import randid from Acquisition import aq_parent, aq_inner, aq_base
from Acquisition import aq_parent, aq_inner, aq_base, aq_self from zLOG import LOG, WARNING, INFO, TRACE
from zLOG import LOG, WARNING, INFO, TRACE, DEBUG, ERROR
from ZODB.POSException import ConflictError from ZODB.POSException import ConflictError
from DocumentTemplate.DT_Var import sql_quote from DocumentTemplate.DT_Var import sql_quote
from Products.PythonScripts.Utility import allow_class from Products.PythonScripts.Utility import allow_class
...@@ -41,15 +40,12 @@ import urllib ...@@ -41,15 +40,12 @@ import urllib
import string import string
import pprint import pprint
from cStringIO import StringIO from cStringIO import StringIO
from xml.dom.minidom import parse, parseString, getDOMImplementation from xml.dom.minidom import parse
from xml.sax.saxutils import escape, quoteattr from xml.sax.saxutils import escape, quoteattr
import os import os
import md5 import md5
import threading
import types
try: try:
from Products.PageTemplates.Expressions import SecureModuleImporter
from Products.CMFCore.Expression import Expression from Products.CMFCore.Expression import Expression
from Products.PageTemplates.Expressions import getEngine from Products.PageTemplates.Expressions import getEngine
from Products.CMFCore.utils import getToolByName from Products.CMFCore.utils import getToolByName
...@@ -401,7 +397,7 @@ class Query(QueryMixin): ...@@ -401,7 +397,7 @@ class Query(QueryMixin):
else: else:
where_expression = '(%s)' % (' %s ' % self.getOperator()).join(where_expression) where_expression = '(%s)' % (' %s ' % self.getOperator()).join(where_expression)
else: else:
where_expression = 'True' # It is better to have a valid default where_expression = '1' # It is better to have a valid default
return {'where_expression':where_expression, return {'where_expression':where_expression,
'select_expression_list':select_expression} 'select_expression_list':select_expression}
...@@ -1292,7 +1288,7 @@ class Catalog( Folder, ...@@ -1292,7 +1288,7 @@ class Catalog( Folder,
def manage_catalogObject(self, REQUEST, RESPONSE, URL1, urls=None): def manage_catalogObject(self, REQUEST, RESPONSE, URL1, urls=None):
""" index Zope object(s) that 'urls' point to """ """ index Zope object(s) that 'urls' point to """
if urls: if urls:
if isinstance(urls, types.StringType): if isinstance(urls, str):
urls=(urls,) urls=(urls,)
for url in urls: for url in urls:
...@@ -1308,7 +1304,7 @@ class Catalog( Folder, ...@@ -1308,7 +1304,7 @@ class Catalog( Folder,
""" removes Zope object(s) 'urls' from catalog """ """ removes Zope object(s) 'urls' from catalog """
if urls: if urls:
if isinstance(urls, types.StringType): if isinstance(urls, str):
urls=(urls,) urls=(urls,)
for url in urls: for url in urls:
...@@ -1386,49 +1382,61 @@ class Catalog( Folder, ...@@ -1386,49 +1382,61 @@ class Catalog( Folder,
urllib.quote('Catalog Updated<br>Total time: %s<br>Total CPU time: %s' % (`elapse`, `c_elapse`))) urllib.quote('Catalog Updated<br>Total time: %s<br>Total CPU time: %s' % (`elapse`, `c_elapse`)))
def catalogObject(self, object, path, is_object_moved=0): def catalogObject(self, object, path, is_object_moved=0):
""" """Add an object to the Catalog by calling all SQL methods and
Adds an object to the Catalog by calling providing needed arguments.
all SQL methods and providing needed arguments.
'object' is the object to be cataloged
'uid' is the unique Catalog identifier for this object
""" 'object' is the object to be catalogged."""
self.catalogObjectList([object]) self._catalogObjectList([object])
def catalogObjectList(self, object_list, method_id_list=None, disable_cache=0, def catalogObjectList(self, object_list, method_id_list=None,
check_uid=1, idxs=None): disable_cache=0, check_uid=1, idxs=None):
""" """Add objects to the Catalog by calling all SQL methods and
Add objects to the Catalog by calling providing needed arguments.
all SQL methods and providing needed arguments.
method_id_list : specify which method should be used. If not method_id_list : specify which methods should be used. If not
set it will take the default value of portal_catalog set, it will take the default value of portal_catalog.
disable_cache : do not use cache, so values will be computed each time, disable_cache : do not use cache, so values will be computed each time,
only usefull in some particular cases, most of the time only useful in some particular cases, most of the time
you don't need to use it you don't need to use it.
Each element of 'object_list' is an object to be cataloged Each element of 'object_list' is an object to be catalogged.
'uid' is the unique Catalog identifier for this object 'uid' is the unique Catalog identifier for this object.
WARNING: This method assumes that currently all objects are being reindexed from scratch. Note that this method calls _catalogObjectList with fragments of
the object list, because calling _catalogObjectList with too many
XXX: For now newUid is used to allocated UIDs. Is this good? Is it better to INSERT then SELECT? objects at a time bloats the process's memory consumption, due to
""" caching."""
# XXX 300 is arbitrary.
for i in xrange(0, len(object_list), 300):
self._catalogObjectList(object_list[i:i+300],
method_id_list=method_id_list,
disable_cache=disable_cache,
check_uid=check_uid,
idxs=idxs)
def _catalogObjectList(self, object_list, method_id_list=None,
disable_cache=0, check_uid=1, idxs=None):
"""This is the real method to catalog objects.
XXX: For now newUid is used to allocated UIDs. Is this good?
Is it better to INSERT then SELECT?"""
LOG('SQLCatalog', TRACE, 'catalogging %d objects' % len(object_list)) LOG('SQLCatalog', TRACE, 'catalogging %d objects' % len(object_list))
#LOG('catalogObjectList', 0, 'called with %r' % (object_list,)) #LOG('catalogObjectList', 0, 'called with %r' % (object_list,))
if idxs not in (None, []): if idxs not in (None, []):
LOG('ZSLQCatalog.SQLCatalog:catalogObjectList', 0, 'Warning: idxs is ignored in this function and is only provided to be compatible with CMFCatalogAware.reindexObject.') LOG('ZSLQCatalog.SQLCatalog:catalogObjectList', WARNING,
'idxs is ignored in this function and is only provided to be compatible with CMFCatalogAware.reindexObject.')
if not self.isIndexable(): if not self.isIndexable():
return None return None
site_root = self.getSiteRoot() site_root = self.getSiteRoot()
# Reminder about optimization: This below calls one or two small SQL
# queries for every object. This is extremely inefficient. It is
# far more efficient to send one or two queries for all objects.
for object in object_list: for object in object_list:
path = object.getPath() path = object.getPath()
if not getattr(aq_base(object), 'uid', None): if not getattr(aq_base(object), 'uid', None):
...@@ -1493,7 +1501,6 @@ class Catalog( Folder, ...@@ -1493,7 +1501,6 @@ class Catalog( Folder,
method_id_list = self.sql_catalog_object_list method_id_list = self.sql_catalog_object_list
econtext_cache = {} econtext_cache = {}
argument_cache = {} argument_cache = {}
expression_cache = {}
try: try:
if not disable_cache: if not disable_cache:
...@@ -1514,15 +1521,12 @@ class Catalog( Folder, ...@@ -1514,15 +1521,12 @@ class Catalog( Folder,
if type_list and portal_type not in type_list: if type_list and portal_type not in type_list:
continue continue
elif expression is not None: elif expression is not None:
expression_key = (object.uid, self.filter_dict[method_name]['expression'])
try:
result = expression_cache[expression_key]
except KeyError:
try: try:
econtext = econtext_cache[object.uid] econtext = econtext_cache[object.uid]
except KeyError: except KeyError:
econtext = econtext_cache[object.uid] = self.getExpressionContext(object) econtext = self.getExpressionContext(object)
result = expression_cache[expression_key] = expression(econtext) econtext_cache[object.uid] = econtext
result = expression(econtext)
if not result: if not result:
continue continue
catalogged_object_list.append(object) catalogged_object_list.append(object)
...@@ -1536,7 +1540,7 @@ class Catalog( Folder, ...@@ -1536,7 +1540,7 @@ class Catalog( Folder,
#LOG('catalogObjectList', 0, 'method_name = %s' % (method_name,)) #LOG('catalogObjectList', 0, 'method_name = %s' % (method_name,))
method = getattr(self, method_name) method = getattr(self, method_name)
if method.meta_type in ["Z SQL Method", "LDIF Method"]: if method.meta_type in ("Z SQL Method", "LDIF Method"):
# Build the dictionnary of values # Build the dictionnary of values
arguments = method.arguments_src arguments = method.arguments_src
for arg in split(arguments): for arg in split(arguments):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment