Commit 2f2ef92f authored by Andreas Jung's avatar Andreas Jung

     - PathIndex and TopicIndex are now using a counter for the number
       of indexed objects instead of using a very expensive calculation
       based on the keys of their indexes.
parent 5dfe4e63
...@@ -13,6 +13,10 @@ Zope Changes ...@@ -13,6 +13,10 @@ Zope Changes
Bugs fixed Bugs fixed
- PathIndex and TopicIndex are now using a counter for the number
of indexed objects instead of using a very expensive calculation
based on the keys of their indexes.
- backport of TAL fixes from z3 - backport of TAL fixes from z3
* i18n and metal interactions * i18n and metal interactions
......
...@@ -15,17 +15,16 @@ import os, sys, unittest ...@@ -15,17 +15,16 @@ import os, sys, unittest
from Products.PluginIndexes.PathIndex.PathIndex import PathIndex from Products.PluginIndexes.PathIndex.PathIndex import PathIndex
class Dummy: class Dummy:
meta_type="foo" meta_type="foo"
def __init__( self, path): def __init__( self, path):
self.path = path self.path = path
def getPhysicalPath(self): def getPhysicalPath(self):
return self.path.split('/') return self.path.split('/')
def __str__( self ): def __str__( self ):
return '<Dummy: %s>' % self.path return '<Dummy: %s>' % self.path
...@@ -62,25 +61,35 @@ class TestCase( unittest.TestCase ): ...@@ -62,25 +61,35 @@ class TestCase( unittest.TestCase ):
self._index.index_object( k, v ) self._index.index_object( k, v )
def testEmpty(self): def testEmpty(self):
self.assertEqual(self._index.numObjects() ,0)
assert len( self._index ) == 0 self.assertEqual(self._index.getEntryForObject(1234), None)
assert self._index.getEntryForObject( 1234 ) is None
self._index.unindex_object( 1234 ) # nothrow self._index.unindex_object( 1234 ) # nothrow
assert self._index._apply_index( {"suxpath":"xxx"} ) is None self.assertEqual(self._index._apply_index({"suxpath":"xxx"}), None)
def testUnIndex(self): def testUnIndex(self):
self._populateIndex() self._populateIndex()
self.assertEqual(self._index.numObjects(), 18)
for k in self._values.keys(): for k in self._values.keys():
self._index.unindex_object(k) self._index.unindex_object(k)
assert len(self._index._index)==0 self.assertEqual(self._index.numObjects(), 0)
assert len(self._index._unindex)==0 self.assertEqual(len(self._index._index), 0)
self.assertEqual(len(self._index._unindex), 0)
def testReindex(self):
self._populateIndex()
self.assertEqual(self._index.numObjects(), 18)
o = Dummy('/foo/bar')
self._index.index_object(19, o)
self.assertEqual(self._index.numObjects(), 19)
self._index.index_object(19, o)
self.assertEqual(self._index.numObjects(), 19)
def testUnIndexError(self): def testUnIndexError(self):
self._populateIndex() self._populateIndex()
# this should not raise an error # this should not raise an error
self._index.unindex_object(-1) self._index.unindex_object(-1)
...@@ -91,10 +100,7 @@ class TestCase( unittest.TestCase ): ...@@ -91,10 +100,7 @@ class TestCase( unittest.TestCase ):
def testRoot(self): def testRoot(self):
self._populateIndex() self._populateIndex()
tests = ( ("/",0, range(1,19)), )
tests = [
("/",0, range(1,19)),
]
for comp,level,results in tests: for comp,level,results in tests:
for path in [comp,"/"+comp,"/"+comp+"/"]: for path in [comp,"/"+comp,"/"+comp+"/"]:
...@@ -110,14 +116,10 @@ class TestCase( unittest.TestCase ): ...@@ -110,14 +116,10 @@ class TestCase( unittest.TestCase ):
lst = list(res[0].keys()) lst = list(res[0].keys())
self.assertEqual(lst,results) self.assertEqual(lst,results)
def testRoot(self): def testRoot(self):
self._populateIndex() self._populateIndex()
tests = ( ("/",0, range(1,19)), )
tests = [
("/",0, range(1,19)),
]
for comp,level,results in tests: for comp,level,results in tests:
for path in [comp,"/"+comp,"/"+comp+"/"]: for path in [comp,"/"+comp,"/"+comp+"/"]:
...@@ -137,7 +139,6 @@ class TestCase( unittest.TestCase ): ...@@ -137,7 +139,6 @@ class TestCase( unittest.TestCase ):
def testSimpleTests(self): def testSimpleTests(self):
self._populateIndex() self._populateIndex()
tests = [ tests = [
("aa", 0, [1,2,3,4,5,6,7,8,9]), ("aa", 0, [1,2,3,4,5,6,7,8,9]),
("aa", 1, [1,2,3,10,11,12] ), ("aa", 1, [1,2,3,10,11,12] ),
...@@ -172,7 +173,6 @@ class TestCase( unittest.TestCase ): ...@@ -172,7 +173,6 @@ class TestCase( unittest.TestCase ):
def testComplexOrTests(self): def testComplexOrTests(self):
self._populateIndex() self._populateIndex()
tests = [ tests = [
(['aa','bb'],1,[1,2,3,4,5,6,10,11,12,13,14,15]), (['aa','bb'],1,[1,2,3,4,5,6,10,11,12,13,14,15]),
(['aa','bb','xx'],1,[1,2,3,4,5,6,10,11,12,13,14,15]), (['aa','bb','xx'],1,[1,2,3,4,5,6,10,11,12,13,14,15]),
...@@ -189,7 +189,6 @@ class TestCase( unittest.TestCase ): ...@@ -189,7 +189,6 @@ class TestCase( unittest.TestCase ):
def testComplexANDTests(self): def testComplexANDTests(self):
self._populateIndex() self._populateIndex()
tests = [ tests = [
(['aa','bb'],1,[]), (['aa','bb'],1,[]),
([('aa',0),('bb',1)],0,[4,5,6]), ([('aa',0),('bb',1)],0,[4,5,6]),
...@@ -197,7 +196,6 @@ class TestCase( unittest.TestCase ): ...@@ -197,7 +196,6 @@ class TestCase( unittest.TestCase ):
] ]
for lst ,level,results in tests: for lst ,level,results in tests:
res = self._index._apply_index( res = self._index._apply_index(
{"path":{'query':lst,"level":level,"operator":"and"}}) {"path":{'query':lst,"level":level,"operator":"and"}})
lst = list(res[0].keys()) lst = list(res[0].keys())
......
...@@ -11,32 +11,32 @@ ...@@ -11,32 +11,32 @@
# #
############################################################################## ##############################################################################
__version__ = '$Id: TopicIndex.py,v 1.13 2003/06/23 08:45:58 andreasjung Exp $' __version__ = '$Id: TopicIndex.py,v 1.14 2003/08/16 16:44:48 andreasjung Exp $'
from Products.PluginIndexes import PluggableIndex
from Products.PluginIndexes.common.util import parseIndexRequest
from Globals import Persistent, DTMLFile from Globals import Persistent, DTMLFile
from OFS.SimpleItem import SimpleItem from OFS.SimpleItem import SimpleItem
from Acquisition import Implicit from zLOG import ERROR, LOG
from BTrees.OOBTree import OOBTree from BTrees.OOBTree import OOBTree
from BTrees.IIBTree import IISet,intersection,union from BTrees.IIBTree import IISet,intersection,union
from zLOG import ERROR, LOG
import FilteredSet import FilteredSet
from Products.PluginIndexes import PluggableIndex
from Products.PluginIndexes.common.util import parseIndexRequest
_marker = [] _marker = []
class TopicIndex(Persistent, Implicit, SimpleItem): class TopicIndex(Persistent, SimpleItem):
""" A TopicIndex maintains a set of FilteredSet objects. """ A TopicIndex maintains a set of FilteredSet objects.
Every FilteredSet object consists of an expression and Every FilteredSet object consists of an expression and
and IISet with all Ids of indexed objects that eval with and IISet with all Ids of indexed objects that eval with
this expression to 1. this expression to 1.
""" """
__implements__ = (PluggableIndex.PluggableIndexInterface,) __implements__ = (PluggableIndex.PluggableIndexInterface,)
meta_type="TopicIndex" meta_type="TopicIndex"
query_options = ('query','operator')
manage_options= ( manage_options= (
{'label': 'FilteredSets', {'label': 'FilteredSets',
...@@ -44,93 +44,59 @@ class TopicIndex(Persistent, Implicit, SimpleItem): ...@@ -44,93 +44,59 @@ class TopicIndex(Persistent, Implicit, SimpleItem):
'help': ('TopicIndex','TopicIndex_searchResults.stx')}, 'help': ('TopicIndex','TopicIndex_searchResults.stx')},
) )
manage_workspace = DTMLFile('dtml/manageTopicIndex',globals())
query_options = ('query','operator')
def __init__(self,id,caller=None): def __init__(self,id,caller=None):
self.id = id self.id = id
self.filteredSets = OOBTree() self.filteredSets = OOBTree()
# experimental code for specifing the operator self.operators = ('or','and')
self.operators = ('or','and')
self.defaultOperator = 'or' self.defaultOperator = 'or'
def getId(self): return self.id def getId(self): return self.id
def clear(self): def clear(self):
""" clear everything """
for fs in self.filteredSets.values(): for fs in self.filteredSets.values():
fs.clear() fs.clear()
def index_object(self, docid, obj ,threshold=100):
def index_object(self, documentId, obj ,threshold=100):
""" hook for (Z)Catalog """ """ hook for (Z)Catalog """
for fid, filteredSet in self.filteredSets.items(): for fid, filteredSet in self.filteredSets.items():
filteredSet.index_object(documentId,obj) filteredSet.index_object(docid,obj)
return 1 return 1
def unindex_object(self,docid):
def unindex_object(self,documentId):
""" hook for (Z)Catalog """ """ hook for (Z)Catalog """
for fs in self.filteredSets.values(): for fs in self.filteredSets.values():
try: try:
fs.unindex_object(documentId) fs.unindex_object(docid)
except KeyError: except KeyError:
LOG(self.__class__.__name__, ERROR, LOG(self.__class__.__name__, ERROR,
'Attempt to unindex document' 'Attempt to unindex document'
' with id %s failed' % documentId) ' with id %s failed' % docid)
return 1 return 1
def __len__(self):
""" len """
n=0
for fs in self.filteredSets.values():
n = n + len(fs.getIds())
return n
def numObjects(self): def numObjects(self):
return "N/A" return "n/a"
def keys(self): pass
def values(self): pass
def items(self): pass
def search(self,filterId):
if self.filteredSets.has_key(filterId):
return self.filteredSets[filterId].getIds()
def search(self,filter_id):
if self.filteredSets.has_key(filter_id):
return self.filteredSets[filter_id].getIds()
def _apply_index(self, request, cid=''): def _apply_index(self, request, cid=''):
""" hook for (Z)Catalog """ hook for (Z)Catalog
request mapping type (usually {"topic": "..." } 'request' -- mapping type (usually {"topic": "..." }
cid ??? 'cid' -- ???
""" """
record = parseIndexRequest(request,self.id,self.query_options) record = parseIndexRequest(request,self.id,self.query_options)
if record.keys==None: return None if record.keys is None: return None
# experimental code for specifing the operator operator = record.get('operator', self.defaultOperator).lower()
operator = record.get('operator',self.defaultOperator).lower() if operator == 'or': set_func = union
else: set_func = intersection
# depending on the operator we use intersection of union
if operator=="or": set_func = union
else: set_func = intersection
res = None res = None
for filter_id in record.keys:
for filterId in record.keys: rows = self.search(filter_id)
rows = self.search(filterId)
res = set_func(res,rows) res = set_func(res,rows)
if res: if res:
...@@ -138,79 +104,65 @@ class TopicIndex(Persistent, Implicit, SimpleItem): ...@@ -138,79 +104,65 @@ class TopicIndex(Persistent, Implicit, SimpleItem):
else: else:
return IISet(), (self.id,) return IISet(), (self.id,)
def uniqueValues(self,name=None, withLength=0):
def uniqueValues(self,name=None,withLength=0):
""" needed to be consistent with the interface """ """ needed to be consistent with the interface """
return self.filteredSets.keys() return self.filteredSets.keys()
def getEntryForObject(self,docid, default=_marker):
def getEntryForObject(self,documentId,default=_marker):
""" Takes a document ID and returns all the information we have """ Takes a document ID and returns all the information we have
on that specific object. """ on that specific object.
"""
return self.filteredSets.keys() return self.filteredSets.keys()
def addFilteredSet(self, filter_id, typeFilteredSet, expr):
def addFilteredSet(self, filterId, typeFilteredSet, expr): if self.filteredSets.has_key(filter_id):
if self.filteredSets.has_key(filterId):
raise KeyError,\ raise KeyError,\
'A FilteredSet with this name already exists: %s' % filterId 'A FilteredSet with this name already exists: %s' % filter_id
self.filteredSets[filter_id] = \
self.filteredSets[filterId] = \ FilteredSet.factory(filter_id, typeFilteredSet, expr)
FilteredSet.factory(filterId, typeFilteredSet, expr)
def delFilteredSet(self,filter_id):
def delFilteredSet(self,filterId): if not self.filteredSets.has_key(filter_id):
if not self.filteredSets.has_key(filterId):
raise KeyError,\ raise KeyError,\
'no such FilteredSet: %s' % filterId 'no such FilteredSet: %s' % filter_id
del self.filteredSets[filter_id]
del self.filteredSets[filterId]
def clearFilteredSet(self,filter_id):
def clearFilteredSet(self,filterId): if not self.filteredSets.has_key(filter_id):
if not self.filteredSets.has_key(filterId):
raise KeyError,\ raise KeyError,\
'no such FilteredSet: %s' % filterId 'no such FilteredSet: %s' % filter_id
self.filteredSets[filter_id].clear()
self.filteredSets[filterId].clear()
def manage_addFilteredSet(self, filter_id, typeFilteredSet, expr, URL1, \
def manage_addFilteredSet(self, filterId, typeFilteredSet, expr, URL1, \
REQUEST=None,RESPONSE=None): REQUEST=None,RESPONSE=None):
""" add a new filtered set """ """ add a new filtered set """
if len(filterId)==0: raise RuntimeError,'Length of ID too short' if len(filter_id) == 0: raise RuntimeError,'Length of ID too short'
if len(expr)==0: raise RuntimeError,'Length of expression too short' if len(expr) == 0: raise RuntimeError,'Length of expression too short'
self.addFilteredSet(filterId, typeFilteredSet, expr) self.addFilteredSet(filter_id, typeFilteredSet, expr)
if RESPONSE: if RESPONSE:
RESPONSE.redirect(URL1+'/manage_workspace?' RESPONSE.redirect(URL1+'/manage_workspace?'
'manage_tabs_message=FilteredSet%20added') 'manage_tabs_message=FilteredSet%20added')
def manage_delFilteredSet(self, filter_ids=[], URL1=None, \
def manage_delFilteredSet(self, filterIds=[], URL1=None, \
REQUEST=None,RESPONSE=None): REQUEST=None,RESPONSE=None):
""" delete a list of FilteredSets""" """ delete a list of FilteredSets"""
for filterId in filterIds: for filter_id in filter_ids:
self.delFilteredSet(filterId) self.delFilteredSet(filter_id)
if RESPONSE: if RESPONSE:
RESPONSE.redirect(URL1+'/manage_workspace?' RESPONSE.redirect(URL1+'/manage_workspace?'
'manage_tabs_message=FilteredSet(s)%20deleted') 'manage_tabs_message=FilteredSet(s)%20deleted')
def manage_saveFilteredSet(self,filter_id, expr, URL1=None,\
def manage_saveFilteredSet(self,filterId, expr, URL1=None,\
REQUEST=None,RESPONSE=None): REQUEST=None,RESPONSE=None):
""" save expression for a FilteredSet """ """ save expression for a FilteredSet """
self.filteredSets[filterId].setExpression(expr) self.filteredSets[filter_id].setExpression(expr)
if RESPONSE: if RESPONSE:
RESPONSE.redirect(URL1+'/manage_workspace?' RESPONSE.redirect(URL1+'/manage_workspace?'
...@@ -219,22 +171,22 @@ class TopicIndex(Persistent, Implicit, SimpleItem): ...@@ -219,22 +171,22 @@ class TopicIndex(Persistent, Implicit, SimpleItem):
def getIndexSourceNames(self): def getIndexSourceNames(self):
""" return names of indexed attributes """ """ return names of indexed attributes """
return ('n/a',) return ('n/a',)
def manage_clearFilteredSet(self, filterIds=[], URL1=None, \ def manage_clearFilteredSet(self, filter_ids=[], URL1=None, \
REQUEST=None,RESPONSE=None): REQUEST=None,RESPONSE=None):
""" clear a list of FilteredSets""" """ clear a list of FilteredSets"""
for filterId in filterIds: for filter_id in filter_ids:
self.clearFilteredSet(filterId) self.clearFilteredSet(filter_id)
if RESPONSE: if RESPONSE:
RESPONSE.redirect(URL1+'/manage_workspace?' RESPONSE.redirect(URL1+'/manage_workspace?'
'manage_tabs_message=FilteredSet(s)%20cleared') 'manage_tabs_message=FilteredSet(s)%20cleared')
editFilteredSet = DTMLFile('dtml/editFilteredSet',globals())
index_html = DTMLFile('dtml/index', globals()) index_html = DTMLFile('dtml/index', globals())
manage_workspace = DTMLFile('dtml/manageTopicIndex',globals())
editFilteredSet = DTMLFile('dtml/editFilteredSet',globals())
manage_addTopicIndexForm = DTMLFile('dtml/addTopicIndex', globals()) manage_addTopicIndexForm = DTMLFile('dtml/addTopicIndex', globals())
......
...@@ -11,23 +11,20 @@ ...@@ -11,23 +11,20 @@
# #
############################################################################## ##############################################################################
import os ,sys, re, unittest
import ZODB import ZODB
import os,sys,re,unittest
from Products.PluginIndexes.TopicIndex.TopicIndex import TopicIndex from Products.PluginIndexes.TopicIndex.TopicIndex import TopicIndex
class Obj: class Obj:
def __init__(self,id,meta_type=''): def __init__(self,id,meta_type=''):
self.id = id self.id = id
self.meta_type = meta_type self.meta_type = meta_type
def getId(self): return self.id def getId(self): return self.id
def getPhysicalPath(self): return self.id def getPhysicalPath(self): return self.id
class TestBase(unittest.TestCase): class TestBase(unittest.TestCase):
def _searchAnd(self,query,expected): def _searchAnd(self,query,expected):
...@@ -36,19 +33,15 @@ class TestBase(unittest.TestCase): ...@@ -36,19 +33,15 @@ class TestBase(unittest.TestCase):
def _searchOr(self,query,expected): def _searchOr(self,query,expected):
return self._search(query,'or',expected) return self._search(query,'or',expected)
def _search(self,query,operator,expected): def _search(self,query,operator,expected):
res = self.TI._apply_index({'topic':{'query':query,'operator':operator}}) res = self.TI._apply_index({'topic':{'query':query,'operator':operator}})
rows = list(res[0]) rows = list(res[0])
rows.sort() rows.sort()
expected.sort() expected.sort()
self.assertEqual(rows,expected,query) self.assertEqual(rows,expected,query)
return rows return rows
class TestTopicIndex(TestBase): class TestTopicIndex(TestBase):
def setUp(self): def setUp(self):
...@@ -66,7 +59,6 @@ class TestTopicIndex(TestBase): ...@@ -66,7 +59,6 @@ class TestTopicIndex(TestBase):
def testOr(self): def testOr(self):
self._searchOr('doc1',[1,2]) self._searchOr('doc1',[1,2])
self._searchOr(['doc1'],[1,2]) self._searchOr(['doc1'],[1,2])
self._searchOr('doc2',[3,4]), self._searchOr('doc2',[3,4]),
...@@ -75,15 +67,12 @@ class TestTopicIndex(TestBase): ...@@ -75,15 +67,12 @@ class TestTopicIndex(TestBase):
def testAnd(self): def testAnd(self):
self._searchAnd('doc1',[1,2]) self._searchAnd('doc1',[1,2])
self._searchAnd(['doc1'],[1,2]) self._searchAnd(['doc1'],[1,2])
self._searchAnd('doc2',[3,4]) self._searchAnd('doc2',[3,4])
self._searchAnd(['doc2'],[3,4]) self._searchAnd(['doc2'],[3,4])
self._searchAnd(['doc1','doc2'],[]) self._searchAnd(['doc1','doc2'],[])
def test_suite(): def test_suite():
return unittest.TestSuite( ( return unittest.TestSuite( (
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment