Commit 2f2ef92f authored by Andreas Jung's avatar Andreas Jung

     - PathIndex and TopicIndex are now using a counter for the number
       of indexed objects instead of using a very expensive calculation
       based on the keys of their indexes.
parent 5dfe4e63
......@@ -13,6 +13,10 @@ Zope Changes
Bugs fixed
- PathIndex and TopicIndex are now using a counter for the number
of indexed objects instead of using a very expensive calculation
based on the keys of their indexes.
- backport of TAL fixes from z3
* i18n and metal interactions
......
......@@ -11,26 +11,26 @@
#
##############################################################################
__version__ = '$Id: PathIndex.py,v 1.35 2003/06/17 19:01:07 sidnei Exp $'
__version__ = '$Id: PathIndex.py,v 1.36 2003/08/16 16:44:42 andreasjung Exp $'
from Products.PluginIndexes import PluggableIndex
from Products.PluginIndexes.common.util import parseIndexRequest
from Products.PluginIndexes.common import safe_callable
import warnings
from types import StringType, ListType, TupleType
from Globals import Persistent, DTMLFile
from Acquisition import Implicit
from OFS.SimpleItem import SimpleItem
from BTrees.IOBTree import IOBTree
from BTrees.OOBTree import OOBTree
from BTrees.IIBTree import IITreeSet, IISet, intersection, union
from OFS.SimpleItem import SimpleItem
from BTrees.Length import Length
from zLOG import LOG, ERROR
from types import StringType, ListType, TupleType
import warnings
from Products.PluginIndexes import PluggableIndex
from Products.PluginIndexes.common.util import parseIndexRequest
from Products.PluginIndexes.common import safe_callable
_marker = []
class PathIndex(Persistent, Implicit, SimpleItem):
class PathIndex(Persistent, SimpleItem):
""" A path index stores all path components of the physical
path of an object:
......@@ -41,7 +41,7 @@ class PathIndex(Persistent, Implicit, SimpleItem):
- every component is kept as a key of a OOBTree in self._indexes
- the value is a mapping 'level of the path component' to
'all documentIds with this path component on this level'
'all docids with this path component on this level'
"""
......@@ -55,33 +55,26 @@ class PathIndex(Persistent, Implicit, SimpleItem):
'help': ('PathIndex','PathIndex_Settings.stx')},
)
query_options = ["query", "level", "operator"]
query_options = ("query", "level", "operator")
def __init__(self,id,caller=None):
self.id = id
# experimental code for specifing the operator
self.operators = ['or','and']
self.operators = ('or','and')
self.useOperator = 'or'
self.clear()
def clear(self):
""" clear everything """
self._depth = 0
self._index = OOBTree()
self._depth = 0
self._index = OOBTree()
self._unindex = IOBTree()
self._length = Length(0)
def insertEntry(self, comp, id, level):
"""Insert an entry.
comp is a path component (generated by splitPath() )
id is the documentId
level is the level of the component inside the path
comp is a path component
id is the docid
level is the level of the component inside the path
"""
if not self._index.has_key(comp):
......@@ -94,16 +87,11 @@ class PathIndex(Persistent, Implicit, SimpleItem):
if level > self._depth:
self._depth = level
def index_object(self, documentId, obj ,threshold=100):
def index_object(self, docid, obj ,threshold=100):
""" hook for (Z)Catalog """
# first we check if the object provide an attribute or
# method to be used as hook for the PathIndex
if hasattr(obj, self.id):
f = getattr(obj, self.id)
f = getattr(obj, self.id, None)
if f is not None:
if safe_callable(f):
try:
path = f()
......@@ -112,8 +100,7 @@ class PathIndex(Persistent, Implicit, SimpleItem):
else:
path = f
if not (isinstance(path, StringType) or
isinstance(path, TupleType)):
if not isinstance(path, (StringType, TupleType)):
raise TypeError('path value must be string or tuple of strings')
else:
try:
......@@ -121,36 +108,34 @@ class PathIndex(Persistent, Implicit, SimpleItem):
except AttributeError:
return 0
if type(path) in (ListType, TupleType):
if isinstance(path, (ListType, TupleType)):
path = '/'+ '/'.join(path[1:])
comps = self.splitPath(path, obj)
comps = filter(None, path.split('/'))
if not self._unindex.has_key(docid):
self._length.change(1)
for i in range(len(comps)):
self.insertEntry(comps[i], documentId, i)
self._unindex[documentId] = path
self.insertEntry(comps[i], docid, i)
self._unindex[docid] = path
return 1
def unindex_object(self, documentId):
def unindex_object(self, docid):
""" hook for (Z)Catalog """
if not self._unindex.has_key(documentId):
if not self._unindex.has_key(docid):
LOG(self.__class__.__name__, ERROR,
'Attempt to unindex nonexistent document'
' with id %s' % documentId)
' with id %s' % docid)
return
path = self._unindex[documentId]
comps = path.split('/')
comps = self._unindex[docid].split('/')
for level in range(len(comps[1:])):
comp = comps[level+1]
try:
self._index[comp][level].remove(documentId)
self._index[comp][level].remove(docid)
if not self._index[comp][level]:
del self._index[comp][level]
......@@ -160,34 +145,10 @@ class PathIndex(Persistent, Implicit, SimpleItem):
except KeyError:
LOG(self.__class__.__name__, ERROR,
'Attempt to unindex document'
' with id %s failed' % documentId)
del self._unindex[documentId]
def printIndex(self):
for k,v in self._index.items():
print "-"*78
print k
for k1,v1 in v.items():
print k1,v1,
print
def splitPath(self, path, obj=None):
""" split physical path of object. If the object has
as function splitPath() we use this user-defined function
to split the path
"""
if hasattr(obj, "splitPath"):
comps = obj.splitPath(path)
else:
comps = filter(None, path.split('/'))
return comps
' with id %s failed' % docid)
self._length.change(-1)
del self._unindex[docid]
def search(self, path, default_level=0):
"""
......@@ -199,95 +160,61 @@ class PathIndex(Persistent, Implicit, SimpleItem):
level < 0 not implemented yet
"""
if isinstance(path,StringType):
if isinstance(path, StringType):
level = default_level
else:
level = int(path[1])
path = path[0]
comps = self.splitPath(path)
comps = filter(None, path.split('/'))
if len(comps) == 0:
return IISet(self._unindex.keys())
if level >= 0:
results = []
for i in range(len(comps)):
comp = comps[i]
if not self._index.has_key(comp): return IISet()
if not self._index[comp].has_key(level+i): return IISet()
results.append( self._index[comp][level+i] )
res = results[0]
for i in range(1,len(results)):
res = intersection(res,results[i])
return res
else:
results = IISet()
for level in range(0,self._depth + 1):
ids = None
error = 0
for cn in range(0,len(comps)):
comp = comps[cn]
try:
ids = intersection(ids,self._index[comp][level+cn])
except KeyError:
error = 1
if error==0:
results = union(results,ids)
return results
def __len__(self):
""" len """
# XXX REALLY inefficient
return len(self._index)
def numObjects(self):
""" return the number of indexed objects"""
# XXX REALLY inefficient
return len(self._unindex)
def keys(self):
""" return list of all path components """
# XXX Could this be lazy, does it need to be a list?
return list(self._index.keys())
def values(self):
# XXX Could this be lazy, does it need to be a list?
return list(self._index.values())
def items(self):
""" mapping path components : documentIds """
# XXX Could this be lazy, does it need to be a list?
return list(self._index.items())
try:
return self._length()
except AttributeError: # backward compatibility
l = len(self._unindex)
self._length = Length(l)
return l
def _apply_index(self, request, cid=''):
""" hook for (Z)Catalog
request mapping type (usually {"path": "..." }
additionaly a parameter "path_level" might be passed
to specify the level (see search())
'request' -- mapping type (usually {"path": "..." }
additionaly a parameter "path_level" might be passed
to specify the level (see search())
cid ???
'cid' -- ???
"""
record = parseIndexRequest(request,self.id,self.query_options)
......@@ -299,19 +226,14 @@ class PathIndex(Persistent, Implicit, SimpleItem):
"Please use a mapping object and the "
"'level' key to specify the operator." % cid)
# get the level parameter
level = record.get("level",0)
# experimental code for specifing the operator
operator = record.get('operator',self.useOperator).lower()
# depending on the operator we use intersection of union
if operator=="or": set_func = union
else: set_func = intersection
if operator == "or": set_func = union
else: set_func = intersection
res = None
for k in record.keys:
rows = self.search(k,level)
res = set_func(res,rows)
......@@ -325,27 +247,24 @@ class PathIndex(Persistent, Implicit, SimpleItem):
"""has unique values for column name"""
return name == self.id
def uniqueValues(self, name=None, withLength=0):
""" needed to be consistent with the interface """
return self._index.keys()
def getIndexSourceNames(self):
""" return names of indexed attributes """
return ('getPhysicalPath', )
def getEntryForObject(self, documentId, default=_marker):
""" Takes a document ID and returns all the information we have
on that specific object. """
def getEntryForObject(self, docid, default=_marker):
""" Takes a document ID and returns all the information
we have on that specific object.
"""
try:
return self._unindex[documentId]
return self._unindex[docid]
except KeyError:
# XXX Why is default ignored?
return None
index_html = DTMLFile('dtml/index', globals())
manage_workspace = DTMLFile('dtml/managePathIndex', globals())
......
......@@ -15,17 +15,16 @@ import os, sys, unittest
from Products.PluginIndexes.PathIndex.PathIndex import PathIndex
class Dummy:
meta_type="foo"
def __init__( self, path):
self.path = path
def getPhysicalPath(self):
return self.path.split('/')
def __str__( self ):
return '<Dummy: %s>' % self.path
......@@ -62,25 +61,35 @@ class TestCase( unittest.TestCase ):
self._index.index_object( k, v )
def testEmpty(self):
assert len( self._index ) == 0
assert self._index.getEntryForObject( 1234 ) is None
self.assertEqual(self._index.numObjects() ,0)
self.assertEqual(self._index.getEntryForObject(1234), None)
self._index.unindex_object( 1234 ) # nothrow
assert self._index._apply_index( {"suxpath":"xxx"} ) is None
self.assertEqual(self._index._apply_index({"suxpath":"xxx"}), None)
def testUnIndex(self):
self._populateIndex()
self.assertEqual(self._index.numObjects(), 18)
for k in self._values.keys():
self._index.unindex_object(k)
assert len(self._index._index)==0
assert len(self._index._unindex)==0
self.assertEqual(self._index.numObjects(), 0)
self.assertEqual(len(self._index._index), 0)
self.assertEqual(len(self._index._unindex), 0)
def testReindex(self):
self._populateIndex()
self.assertEqual(self._index.numObjects(), 18)
o = Dummy('/foo/bar')
self._index.index_object(19, o)
self.assertEqual(self._index.numObjects(), 19)
self._index.index_object(19, o)
self.assertEqual(self._index.numObjects(), 19)
def testUnIndexError(self):
self._populateIndex()
# this should not raise an error
self._index.unindex_object(-1)
......@@ -91,10 +100,7 @@ class TestCase( unittest.TestCase ):
def testRoot(self):
self._populateIndex()
tests = [
("/",0, range(1,19)),
]
tests = ( ("/",0, range(1,19)), )
for comp,level,results in tests:
for path in [comp,"/"+comp,"/"+comp+"/"]:
......@@ -110,14 +116,10 @@ class TestCase( unittest.TestCase ):
lst = list(res[0].keys())
self.assertEqual(lst,results)
def testRoot(self):
self._populateIndex()
tests = [
("/",0, range(1,19)),
]
tests = ( ("/",0, range(1,19)), )
for comp,level,results in tests:
for path in [comp,"/"+comp,"/"+comp+"/"]:
......@@ -137,7 +139,6 @@ class TestCase( unittest.TestCase ):
def testSimpleTests(self):
self._populateIndex()
tests = [
("aa", 0, [1,2,3,4,5,6,7,8,9]),
("aa", 1, [1,2,3,10,11,12] ),
......@@ -172,7 +173,6 @@ class TestCase( unittest.TestCase ):
def testComplexOrTests(self):
self._populateIndex()
tests = [
(['aa','bb'],1,[1,2,3,4,5,6,10,11,12,13,14,15]),
(['aa','bb','xx'],1,[1,2,3,4,5,6,10,11,12,13,14,15]),
......@@ -189,7 +189,6 @@ class TestCase( unittest.TestCase ):
def testComplexANDTests(self):
self._populateIndex()
tests = [
(['aa','bb'],1,[]),
([('aa',0),('bb',1)],0,[4,5,6]),
......@@ -197,7 +196,6 @@ class TestCase( unittest.TestCase ):
]
for lst ,level,results in tests:
res = self._index._apply_index(
{"path":{'query':lst,"level":level,"operator":"and"}})
lst = list(res[0].keys())
......
......@@ -11,32 +11,32 @@
#
##############################################################################
__version__ = '$Id: TopicIndex.py,v 1.13 2003/06/23 08:45:58 andreasjung Exp $'
from Products.PluginIndexes import PluggableIndex
from Products.PluginIndexes.common.util import parseIndexRequest
__version__ = '$Id: TopicIndex.py,v 1.14 2003/08/16 16:44:48 andreasjung Exp $'
from Globals import Persistent, DTMLFile
from OFS.SimpleItem import SimpleItem
from Acquisition import Implicit
from zLOG import ERROR, LOG
from BTrees.OOBTree import OOBTree
from BTrees.IIBTree import IISet,intersection,union
from zLOG import ERROR, LOG
import FilteredSet
from Products.PluginIndexes import PluggableIndex
from Products.PluginIndexes.common.util import parseIndexRequest
_marker = []
class TopicIndex(Persistent, Implicit, SimpleItem):
class TopicIndex(Persistent, SimpleItem):
""" A TopicIndex maintains a set of FilteredSet objects.
Every FilteredSet object consists of an expression and
and IISet with all Ids of indexed objects that eval with
this expression to 1.
Every FilteredSet object consists of an expression and
and IISet with all Ids of indexed objects that eval with
this expression to 1.
"""
__implements__ = (PluggableIndex.PluggableIndexInterface,)
meta_type="TopicIndex"
query_options = ('query','operator')
manage_options= (
{'label': 'FilteredSets',
......@@ -44,93 +44,59 @@ class TopicIndex(Persistent, Implicit, SimpleItem):
'help': ('TopicIndex','TopicIndex_searchResults.stx')},
)
manage_workspace = DTMLFile('dtml/manageTopicIndex',globals())
query_options = ('query','operator')
def __init__(self,id,caller=None):
self.id = id
self.filteredSets = OOBTree()
# experimental code for specifing the operator
self.operators = ('or','and')
self.id = id
self.filteredSets = OOBTree()
self.operators = ('or','and')
self.defaultOperator = 'or'
def getId(self): return self.id
def clear(self):
""" clear everything """
for fs in self.filteredSets.values():
fs.clear()
def index_object(self, documentId, obj ,threshold=100):
def index_object(self, docid, obj ,threshold=100):
""" hook for (Z)Catalog """
for fid, filteredSet in self.filteredSets.items():
filteredSet.index_object(documentId,obj)
filteredSet.index_object(docid,obj)
return 1
def unindex_object(self,documentId):
def unindex_object(self,docid):
""" hook for (Z)Catalog """
for fs in self.filteredSets.values():
try:
fs.unindex_object(documentId)
fs.unindex_object(docid)
except KeyError:
LOG(self.__class__.__name__, ERROR,
'Attempt to unindex document'
' with id %s failed' % documentId)
' with id %s failed' % docid)
return 1
def __len__(self):
""" len """
n=0
for fs in self.filteredSets.values():
n = n + len(fs.getIds())
return n
def numObjects(self):
return "N/A"
def keys(self): pass
def values(self): pass
def items(self): pass
def search(self,filterId):
if self.filteredSets.has_key(filterId):
return self.filteredSets[filterId].getIds()
return "n/a"
def search(self,filter_id):
if self.filteredSets.has_key(filter_id):
return self.filteredSets[filter_id].getIds()
def _apply_index(self, request, cid=''):
""" hook for (Z)Catalog
request mapping type (usually {"topic": "..." }
cid ???
'request' -- mapping type (usually {"topic": "..." }
'cid' -- ???
"""
record = parseIndexRequest(request,self.id,self.query_options)
if record.keys==None: return None
if record.keys is None: return None
# experimental code for specifing the operator
operator = record.get('operator',self.defaultOperator).lower()
# depending on the operator we use intersection of union
if operator=="or": set_func = union
else: set_func = intersection
operator = record.get('operator', self.defaultOperator).lower()
if operator == 'or': set_func = union
else: set_func = intersection
res = None
for filterId in record.keys:
rows = self.search(filterId)
for filter_id in record.keys:
rows = self.search(filter_id)
res = set_func(res,rows)
if res:
......@@ -138,79 +104,65 @@ class TopicIndex(Persistent, Implicit, SimpleItem):
else:
return IISet(), (self.id,)
def uniqueValues(self,name=None,withLength=0):
def uniqueValues(self,name=None, withLength=0):
""" needed to be consistent with the interface """
return self.filteredSets.keys()
def getEntryForObject(self,documentId,default=_marker):
def getEntryForObject(self,docid, default=_marker):
""" Takes a document ID and returns all the information we have
on that specific object. """
on that specific object.
"""
return self.filteredSets.keys()
def addFilteredSet(self, filter_id, typeFilteredSet, expr):
def addFilteredSet(self, filterId, typeFilteredSet, expr):
if self.filteredSets.has_key(filterId):
if self.filteredSets.has_key(filter_id):
raise KeyError,\
'A FilteredSet with this name already exists: %s' % filterId
self.filteredSets[filterId] = \
FilteredSet.factory(filterId, typeFilteredSet, expr)
'A FilteredSet with this name already exists: %s' % filter_id
self.filteredSets[filter_id] = \
FilteredSet.factory(filter_id, typeFilteredSet, expr)
def delFilteredSet(self,filterId):
if not self.filteredSets.has_key(filterId):
def delFilteredSet(self,filter_id):
if not self.filteredSets.has_key(filter_id):
raise KeyError,\
'no such FilteredSet: %s' % filterId
del self.filteredSets[filterId]
'no such FilteredSet: %s' % filter_id
del self.filteredSets[filter_id]
def clearFilteredSet(self,filterId):
if not self.filteredSets.has_key(filterId):
def clearFilteredSet(self,filter_id):
if not self.filteredSets.has_key(filter_id):
raise KeyError,\
'no such FilteredSet: %s' % filterId
self.filteredSets[filterId].clear()
'no such FilteredSet: %s' % filter_id
self.filteredSets[filter_id].clear()
def manage_addFilteredSet(self, filterId, typeFilteredSet, expr, URL1, \
def manage_addFilteredSet(self, filter_id, typeFilteredSet, expr, URL1, \
REQUEST=None,RESPONSE=None):
""" add a new filtered set """
if len(filterId)==0: raise RuntimeError,'Length of ID too short'
if len(expr)==0: raise RuntimeError,'Length of expression too short'
if len(filter_id) == 0: raise RuntimeError,'Length of ID too short'
if len(expr) == 0: raise RuntimeError,'Length of expression too short'
self.addFilteredSet(filterId, typeFilteredSet, expr)
self.addFilteredSet(filter_id, typeFilteredSet, expr)
if RESPONSE:
RESPONSE.redirect(URL1+'/manage_workspace?'
'manage_tabs_message=FilteredSet%20added')
def manage_delFilteredSet(self, filterIds=[], URL1=None, \
def manage_delFilteredSet(self, filter_ids=[], URL1=None, \
REQUEST=None,RESPONSE=None):
""" delete a list of FilteredSets"""
for filterId in filterIds:
self.delFilteredSet(filterId)
for filter_id in filter_ids:
self.delFilteredSet(filter_id)
if RESPONSE:
RESPONSE.redirect(URL1+'/manage_workspace?'
'manage_tabs_message=FilteredSet(s)%20deleted')
def manage_saveFilteredSet(self,filterId, expr, URL1=None,\
def manage_saveFilteredSet(self,filter_id, expr, URL1=None,\
REQUEST=None,RESPONSE=None):
""" save expression for a FilteredSet """
self.filteredSets[filterId].setExpression(expr)
self.filteredSets[filter_id].setExpression(expr)
if RESPONSE:
RESPONSE.redirect(URL1+'/manage_workspace?'
......@@ -219,22 +171,22 @@ class TopicIndex(Persistent, Implicit, SimpleItem):
def getIndexSourceNames(self):
""" return names of indexed attributes """
return ('n/a',)
def manage_clearFilteredSet(self, filterIds=[], URL1=None, \
def manage_clearFilteredSet(self, filter_ids=[], URL1=None, \
REQUEST=None,RESPONSE=None):
""" clear a list of FilteredSets"""
for filterId in filterIds:
self.clearFilteredSet(filterId)
for filter_id in filter_ids:
self.clearFilteredSet(filter_id)
if RESPONSE:
RESPONSE.redirect(URL1+'/manage_workspace?'
'manage_tabs_message=FilteredSet(s)%20cleared')
editFilteredSet = DTMLFile('dtml/editFilteredSet',globals())
index_html = DTMLFile('dtml/index', globals())
manage_workspace = DTMLFile('dtml/manageTopicIndex',globals())
editFilteredSet = DTMLFile('dtml/editFilteredSet',globals())
manage_addTopicIndexForm = DTMLFile('dtml/addTopicIndex', globals())
......
......@@ -11,23 +11,20 @@
#
##############################################################################
import os ,sys, re, unittest
import ZODB
import os,sys,re,unittest
from Products.PluginIndexes.TopicIndex.TopicIndex import TopicIndex
class Obj:
def __init__(self,id,meta_type=''):
self.id = id
self.id = id
self.meta_type = meta_type
def getId(self): return self.id
def getPhysicalPath(self): return self.id
class TestBase(unittest.TestCase):
def _searchAnd(self,query,expected):
......@@ -36,19 +33,15 @@ class TestBase(unittest.TestCase):
def _searchOr(self,query,expected):
return self._search(query,'or',expected)
def _search(self,query,operator,expected):
res = self.TI._apply_index({'topic':{'query':query,'operator':operator}})
rows = list(res[0])
rows.sort()
expected.sort()
self.assertEqual(rows,expected,query)
return rows
class TestTopicIndex(TestBase):
def setUp(self):
......@@ -66,7 +59,6 @@ class TestTopicIndex(TestBase):
def testOr(self):
self._searchOr('doc1',[1,2])
self._searchOr(['doc1'],[1,2])
self._searchOr('doc2',[3,4]),
......@@ -75,15 +67,12 @@ class TestTopicIndex(TestBase):
def testAnd(self):
self._searchAnd('doc1',[1,2])
self._searchAnd(['doc1'],[1,2])
self._searchAnd('doc2',[3,4])
self._searchAnd(['doc2'],[3,4])
self._searchAnd(['doc1','doc2'],[])
def test_suite():
return unittest.TestSuite( (
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment