removed 'SearchIndex' package

3095fce7 · Andreas Jung · 2dc887a3 · 3095fce7 · 2dc887a3 · 2dc887a3
Commit 3095fce7 authored Apr 14, 2004 by Andreas Jung
16 changed files
--- a/doc/CHANGES.txt
+++ b/doc/CHANGES.txt
@@ -24,6 +24,8 @@ Zope Changes

    Features added

+     - The obsolete 'SearchIndex' packages has been removed
+
     - Traversal now supports a "post traversal hook" that get's run
       after traversal finished and the security context is established.


--- a/lib/python/SearchIndex/.testinfo
+++ b/lib/python/SearchIndex/.testinfo
-# Nothing to see here (deprecated module).
--- a/lib/python/SearchIndex/GlobbingLexicon.py
+++ b/lib/python/SearchIndex/GlobbingLexicon.py
-##############################################################################
-#
-# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-#############################################################################
-
-from Lexicon import Lexicon
-from Splitter import Splitter
-from UnTextIndex import Or
-
-import re, string
-
-from BTrees.IIBTree import IISet, union, IITreeSet
-from BTrees.OIBTree import OIBTree
-from BTrees.IOBTree import IOBTree
-from BTrees.OOBTree import OOBTree
-from randid import randid
-
-class GlobbingLexicon(Lexicon):
-    """Lexicon which supports basic globbing function ('*' and '?').
-
-    This lexicon keeps several data structures around that are useful
-    for searching. They are:
-
-      '_lexicon' -- Contains the mapping from word => word_id
-
-      '_inverseLex' -- Contains the mapping from word_id => word
-
-      '_digrams' -- Contains a mapping from digram => word_id
-
-    Before going further, it is necessary to understand what a digram is,
-    as it is a core component of the structure of this lexicon.  A digram
-    is a two-letter sequence in a word.  For example, the word 'zope'
-    would be converted into the digrams::
-
-      ['$z', 'zo', 'op', 'pe', 'e$']
-
-    where the '$' is a word marker.  It is used at the beginning and end
-    of the words.  Those digrams are significant.
-    """
-
-    multi_wc = '*'
-    single_wc = '?'
-    eow = '$'
-
-
-    def __init__(self):
-        self.clear()
-
-    def clear(self):
-        self._lexicon = OIBTree()
-        self._inverseLex = IOBTree()
-        self._digrams = OOBTree()
-
-    def _convertBTrees(self, threshold=200):
-        Lexicon._convertBTrees(self, threshold)
-        if type(self._digrams) is OOBTree: return
-
-        from BTrees.convert import convert
-
-        _digrams=self._digrams
-        self._digrams=OOBTree()
-        self._digrams._p_jar=self._p_jar
-        convert(_digrams, self._digrams, threshold, IITreeSet)
-
-
-    def createDigrams(self, word):
-        """Returns a list with the set of digrams in the word."""
-        digrams = list(word)
-        digrams.append(self.eow)
-        last = self.eow
-
-        for i in range(len(digrams)):
-            last, digrams[i] = digrams[i], last + digrams[i]
-
-        return digrams
-
-
-    def getWordId(self, word):
-        """Provided 'word', return the matching integer word id."""
-
-        if self._lexicon.has_key(word):
-            return self._lexicon[word]
-        else:
-            return self.assignWordId(word)
-
-    set = getWordId                     # Kludge for old code
-
-    def getWord(self, wid):
-        return self._inverseLex.get(wid, None)
-
-    def assignWordId(self, word):
-        """Assigns a new word id to the provided word, and return it."""
-
-        # Double check it's not in the lexicon already, and if it is, just
-        # return it.
-        if self._lexicon.has_key(word):
-            return self._lexicon[word]
-
-
-        # Get word id. BBB Backward compat pain.
-        inverse=self._inverseLex
-        try: insert=inverse.insert
-        except AttributeError:
-            # we have an "old" BTree object
-            if inverse:
-                wid=inverse.keys()[-1]+1
-            else:
-                self._inverseLex=IOBTree()
-                wid=1
-            inverse[wid] = word
-        else:
-            # we have a "new" IOBTree object
-            wid=randid()
-            while not inverse.insert(wid, word):
-                wid=randid()
-
-        self._lexicon[word] = wid
-
-        # Now take all the digrams and insert them into the digram map.
-        for digram in self.createDigrams(word):
-            set = self._digrams.get(digram, None)
-            if set is None:
-                self._digrams[digram] = set = IISet()
-            set.insert(wid)
-
-        return wid
-
-
-    def get(self, pattern):
-        """ Query the lexicon for words matching a pattern."""
-        wc_set = [self.multi_wc, self.single_wc]
-
-        digrams = []
-        globbing = 0
-        for i in range(len(pattern)):
-            if pattern[i] in wc_set:
-                globbing = 1
-                continue
-
-            if i == 0:
-                digrams.insert(i, (self.eow + pattern[i]) )
-                digrams.append((pattern[i] + pattern[i+1]))
-            else:
-                try:
-                    if pattern[i+1] not in wc_set:
-                        digrams.append( pattern[i] + pattern[i+1] )
-
-                except IndexError:
-                    digrams.append( (pattern[i] + self.eow) )
-
-        if not globbing:
-            result =  self._lexicon.get(pattern, None)
-            if result is None:
-                return ()
-            return (result, )
-
-        ## now get all of the intsets that contain the result digrams
-        result = None
-        for digram in digrams:
-            result=union(result, self._digrams.get(digram, None))
-
-        if not result:
-            return ()
-        else:
-            ## now we have narrowed the list of possible candidates
-            ## down to those words which contain digrams.  However,
-            ## some words may have been returned that match digrams,
-            ## but do not match 'pattern'.  This is because some words
-            ## may contain all matching digrams, but in the wrong
-            ## order.
-
-            expr = re.compile(self.createRegex(pattern))
-            words = []
-            hits = IISet()
-            for x in result:
-                if expr.match(self._inverseLex[x]):
-                    hits.insert(x)
-            return hits
-
-
-    def __getitem__(self, word):
-        """ """
-        return self.get(word)
-
-
-    def query_hook(self, q):
-        """expand wildcards"""
-        ListType = type([])
-        i = len(q) - 1
-        while i >= 0:
-            e = q[i]
-            if isinstance(e, ListType):
-                self.query_hook(e)
-            elif ( (self.multi_wc in e) or
-                   (self.single_wc in e) ):
-                wids = self.get(e)
-                words = []
-                for wid in wids:
-                    if words:
-                        words.append(Or)
-                    words.append(wid)
-                if not words:
-                    # if words is empty, return something that will make
-                    # textindex's __getitem__ return an empty result list
-                    words.append('')
-                q[i] = words
-            i = i - 1
-
-        return q
-
-    def Splitter(self, astring, words=None):
-        """ wrap the splitter """
-
-        ## don't do anything, less efficient but there's not much
-        ## sense in stemming a globbing lexicon.
-
-        return Splitter(astring)
-
-
-    def createRegex(self, pat):
-        """Translate a PATTERN to a regular expression.
-
-        There is no way to quote meta-characters.
-        """
-
-        # Remove characters that are meaningful in a regex
-        transTable = string.maketrans("", "")
-        result = string.translate(pat, transTable,
-                                  r'()&|!@#$%^{}\<>.')
-
-        # First, deal with multi-character globbing
-        result = string.replace(result, '*', '.*')
-
-        # Next, we need to deal with single-character globbing
-        result = string.replace(result, '?', '.')
-
-        return "%s$" % result
--- a/lib/python/SearchIndex/Index.py
+++ b/lib/python/SearchIndex/Index.py
-##############################################################################
-#
-# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-
-"""Simple column indices"""
-__version__='$Revision: 1.31 $'[11:-2]
-
-from Persistence import Persistent
-from BTrees.OOBTree import OOBTree
-from BTrees.IIBTree import IITreeSet
-import operator
-from Missing import MV
-import string
-
-ListType=type([])
-StringType=type('s')
-
-
-def nonEmpty(s):
-    "returns true if a non-empty string or any other (nonstring) type"
-    if type(s) is StringType:
-        if s: return 1
-        else: return 0
-    else:
-        return 1
-
-
-class Index(Persistent):
-    """Index object interface"""
-    
-    isDeprecatedIndex = 1
-
-    def __init__(self, data=None, schema=None, id=None,
-                 ignore_ex=None, call_methods=None):
-        """Create an index
-
-        The arguments are:
-
-          'data' -- a mapping from integer object ids to objects or
-          records,
-
-          'schema' -- a mapping from item name to index into data
-          records.  If 'data' is a mapping to objects, then schema
-          should ne 'None'.
-
-          'id' -- the name of the item attribute to index.  This is
-          either an attribute name or a record key.
-
-        """
-        ######################################################################
-        # For b/w compatability, have to allow __init__ calls with zero args
-
-        if not data==schema==id==ignore_ex==call_methods==None:
-            self._data = data
-            self._schema = schema
-            self.id = id
-            self.ignore_ex=ignore_ex
-            self.call_methods=call_methods
-            self._index = OOBTree()
-
-            self._reindex()
-        else:
-            pass
-
-    # for b/w compatability
-    _init = __init__
-
-
-    def dpHasUniqueValuesFor(self, name):
-        ' has unique values for column NAME '
-        if name == self.id:
-            return 1
-        else:
-            return 0
-
-
-    def dpUniqueValues(self, name=None, withLengths=0):
-        """\
-        returns the unique values for name
-
-        if withLengths is true, returns a sequence of
-        tuples of (value, length)
-        """
-        if name is None:
-            name = self.id
-        elif name != self.id:
-            return []
-        if not withLengths: return tuple(
-            filter(nonEmpty,self._index.keys())
-            )
-        else:
-            rl=[]
-            for i in self._index.keys():
-                if not nonEmpty(i): continue
-                else: rl.append((i, len(self._index[i])))
-            return tuple(rl)
-
-
-    def clear(self):
-        self._index = OOBTree()
-
-
-    def _reindex(self, start=0):
-        """Recompute index data for data with ids >= start."""
-
-        index=self._index
-        get=index.get
-
-        if not start: index.clear()
-
-        id = self.id
-        if self._schema is None:
-            f=getattr
-        else:
-            f = operator.__getitem__
-            id = self._schema[id]
-
-        for i,row in self._data.items(start):
-            k=f(row,id)
-
-            if k is None or k == MV: continue
-
-            set=get(k)
-            if set is None: index[k] = set = IITreeSet()
-            set.insert(i)
-
-
-    def index_item(self, i, obj=None):
-        """Recompute index data for data with ids >= start."""
-        index = self._index
-        id = self.id
-        if (self._schema is None) or (obj is not None):
-            f = getattr
-        else:
-            f = operator.__getitem__
-            id = self._schema[id]
-
-        if obj is None:
-            obj = self._data[i]
-
-        try:    k=f(obj, id)
-        except: return
-        if self.call_methods:
-            k=k()
-        if k is None or k == MV:
-            return
-
-        set = index.get(k)
-        if set is None: index[k] = set = IITreeSet()
-        set.insert(i)
-
-
-    def unindex_item(self, i, obj=None):
-        """Recompute index data for data with ids >= start."""
-        index = self._index
-        id = self.id
-        if self._schema is None:
-            f = getattr
-        else:
-            f = operator.__getitem__
-            id = self._schema[id]
-        if obj is None:
-            obj = self._data[i]
-
-        try:    k=f(obj, id)
-        except: return
-        if self.call_methods:
-            k=k()
-        if k is None or k == MV:
-            return
-
-        set = index.get(k)
-        if set is not None: set.remove(i)
-
-
-    def _apply_index(self, request, cid=''):
-        """Apply the index to query parameters given in the argument,
-        request
-
-        The argument should be a mapping object.
-
-        If the request does not contain the needed parameters, then
-        None is returned.
-
-        If the request contains a parameter with the name of the
-        column + '_usage', it is sniffed for information on how to
-        handle applying the index.
-
-        Otherwise two objects are returned.  The first object is a
-        ResultSet containing the record numbers of the matching
-        records.  The second object is a tuple containing the names of
-        all data fields used.
-
-        """
-        id = self.id              #name of the column
-
-        cidid = "%s/%s" % (cid,id)
-        has_key = request.has_key
-        if has_key(cidid): keys = request[cidid]
-        elif has_key(id): keys = request[id]
-        else: return None
-
-        if type(keys) is not ListType: keys=[keys]
-        index = self._index
-        r = None
-        anyTrue = 0
-        opr = None
-
-        if request.has_key(id+'_usage'):
-            # see if any usage params are sent to field
-            opr=string.split(string.lower(request[id+"_usage"]),':')
-            opr, opr_args=opr[0], opr[1:]
-
-        if opr=="range":
-            if 'min' in opr_args: lo = min(keys)
-            else: lo = None
-            if 'max' in opr_args: hi = max(keys)
-            else: hi = None
-
-            anyTrue=1
-            try:
-                if hi: setlist = index.items(lo,hi)
-                else:  setlist = index.items(lo)
-                for k,set in setlist:
-                    w, r = weightedUnion(r, set)
-            except KeyError: pass
-        else:           #not a range
-            get = index.get
-            for key in keys:
-                if key: anyTrue = 1
-                set=get(key)
-                if set is not None:
-                    w, r = weightedUnion(r, set)
-
-        if r is None:
-            if anyTrue: r=IISet()
-            else: return None
-
-        return r, (id,)
--- a/lib/python/SearchIndex/Lexicon.py
+++ b/lib/python/SearchIndex/Lexicon.py
-##############################################################################
-#
-# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-
-__doc__=""" Module breaks out Zope specific methods and behavior.  In
-addition, provides the Lexicon class which defines a word to integer
-mapping.
-
-"""
-
-from Splitter import Splitter
-from Persistence import Persistent
-from Acquisition import Implicit
-
-from BTrees.OIBTree import OIBTree
-from BTrees.IOBTree import IOBTree
-from BTrees.IIBTree import IISet, IITreeSet
-
-from randid import randid
-
-class Lexicon(Persistent, Implicit):
-    """Maps words to word ids and then some
-
-    The Lexicon object is an attempt to abstract vocabularies out of
-    Text indexes.  This abstraction is not totally cooked yet, this
-    module still includes the parser for the 'Text Index Query
-    Language' and a few other hacks.
-
-    """
-
-    # default for older objects
-    stop_syn={}
-
-    def __init__(self, stop_syn=None):
-        self.clear()
-        if stop_syn is None:
-            self.stop_syn = {}
-        else:
-            self.stop_syn = stop_syn
-
-    def clear(self):
-        self._lexicon = OIBTree()
-        self._inverseLex = IOBTree()
-
-    def _convertBTrees(self, threshold=200):
-        if (type(self._lexicon) is OIBTree and
-            type(getattr(self, '_inverseLex', None)) is IOBTree):
-            return
-
-        from BTrees.convert import convert
-
-        lexicon=self._lexicon
-        self._lexicon=OIBTree()
-        self._lexicon._p_jar=self._p_jar
-        convert(lexicon, self._lexicon, threshold)
-
-        try:
-            inverseLex=self._inverseLex
-            self._inverseLex=IOBTree()
-        except AttributeError:
-            # older lexicons didn't have an inverse lexicon
-            self._inverseLex=IOBTree()
-            inverseLex=self._inverseLex
-
-        self._inverseLex._p_jar=self._p_jar
-        convert(inverseLex, self._inverseLex, threshold)
-
-    def set_stop_syn(self, stop_syn):
-        """ pass in a mapping of stopwords and synonyms.  Format is:
-
-        {'word' : [syn1, syn2, ..., synx]}
-
-        Vocabularies do not necesarily need to implement this if their
-        splitters do not support stemming or stoping.
-
-        """
-        self.stop_syn = stop_syn
-
-
-    def getWordId(self, word):
-        """ return the word id of 'word' """
-
-        wid=self._lexicon.get(word, None)
-        if wid is None:
-            wid=self.assignWordId(word)
-        return wid
-
-    set = getWordId
-
-    def getWord(self, wid):
-        """ post-2.3.1b2 method, will not work with unconverted lexicons """
-        return self._inverseLex.get(wid, None)
-
-    def assignWordId(self, word):
-        """Assigns a new word id to the provided word and returns it."""
-        # First make sure it's not already in there
-        if self._lexicon.has_key(word):
-            return self._lexicon[word]
-
-
-        try: inverse=self._inverseLex
-        except AttributeError:
-            # woops, old lexicom wo wids
-            inverse=self._inverseLex=IOBTree()
-            for word, wid in self._lexicon.items():
-                inverse[wid]=word
-
-        wid=randid()
-        while not inverse.insert(wid, word):
-            wid=randid()
-
-        self._lexicon[intern(word)] = wid
-
-        return wid
-
-
-    def get(self, key, default=None):
-        """Return the matched word against the key."""
-        r=IISet()
-        wid=self._lexicon.get(key, default)
-        if wid is not None: r.insert(wid)
-        return r
-
-    def __getitem__(self, key):
-        return self.get(key)
-
-
-    def __len__(self):
-        return len(self._lexicon)
-
-
-    def Splitter(self, astring, words=None):
-        """ wrap the splitter """
-        if words is None:
-            words = self.stop_syn
-        return Splitter(astring, words)
-
-
-    def query_hook(self, q):
-        """ we don't want to modify the query cuz we're dumb """
-        return q
-
-
-
-
-
-stop_words=(
-    'am', 'ii', 'iii', 'per', 'po', 're', 'a', 'about', 'above', 'across',
-    'after', 'afterwards', 'again', 'against', 'all', 'almost', 'alone',
-    'along', 'already', 'also', 'although', 'always', 'am', 'among',
-    'amongst', 'amoungst', 'amount', 'an', 'and', 'another', 'any',
-    'anyhow', 'anyone', 'anything', 'anyway', 'anywhere', 'are', 'around',
-    'as', 'at', 'back', 'be', 'became', 'because', 'become', 'becomes',
-    'becoming', 'been', 'before', 'beforehand', 'behind', 'being',
-    'below', 'beside', 'besides', 'between', 'beyond', 'bill', 'both',
-    'bottom', 'but', 'by', 'can', 'cannot', 'cant', 'con', 'could',
-    'couldnt', 'cry', 'describe', 'detail', 'do', 'done', 'down', 'due',
-    'during', 'each', 'eg', 'eight', 'either', 'eleven', 'else',
-    'elsewhere', 'empty', 'enough', 'even', 'ever', 'every', 'everyone',
-    'everything', 'everywhere', 'except', 'few', 'fifteen', 'fifty',
-    'fill', 'find', 'fire', 'first', 'five', 'for', 'former', 'formerly',
-    'forty', 'found', 'four', 'from', 'front', 'full', 'further', 'get',
-    'give', 'go', 'had', 'has', 'hasnt', 'have', 'he', 'hence', 'her',
-    'here', 'hereafter', 'hereby', 'herein', 'hereupon', 'hers',
-    'herself', 'him', 'himself', 'his', 'how', 'however', 'hundred', 'i',
-    'ie', 'if', 'in', 'inc', 'indeed', 'interest', 'into', 'is', 'it',
-    'its', 'itself', 'keep', 'last', 'latter', 'latterly', 'least',
-    'less', 'made', 'many', 'may', 'me', 'meanwhile', 'might', 'mill',
-    'mine', 'more', 'moreover', 'most', 'mostly', 'move', 'much', 'must',
-    'my', 'myself', 'name', 'namely', 'neither', 'never', 'nevertheless',
-    'next', 'nine', 'no', 'nobody', 'none', 'noone', 'nor', 'not',
-    'nothing', 'now', 'nowhere', 'of', 'off', 'often', 'on', 'once',
-    'one', 'only', 'onto', 'or', 'other', 'others', 'otherwise', 'our',
-    'ours', 'ourselves', 'out', 'over', 'own', 'per', 'perhaps',
-    'please', 'pre', 'put', 'rather', 're', 'same', 'see', 'seem',
-    'seemed', 'seeming', 'seems', 'serious', 'several', 'she', 'should',
-    'show', 'side', 'since', 'sincere', 'six', 'sixty', 'so', 'some',
-    'somehow', 'someone', 'something', 'sometime', 'sometimes',
-    'somewhere', 'still', 'such', 'take', 'ten', 'than', 'that', 'the',
-    'their', 'them', 'themselves', 'then', 'thence', 'there',
-    'thereafter', 'thereby', 'therefore', 'therein', 'thereupon', 'these',
-    'they', 'thick', 'thin', 'third', 'this', 'those', 'though', 'three',
-    'through', 'throughout', 'thru', 'thus', 'to', 'together', 'too',
-    'toward', 'towards', 'twelve', 'twenty', 'two', 'un', 'under',
-    'until', 'up', 'upon', 'us', 'very', 'via', 'was', 'we', 'well',
-    'were', 'what', 'whatever', 'when', 'whence', 'whenever', 'where',
-    'whereafter', 'whereas', 'whereby', 'wherein', 'whereupon',
-    'wherever', 'whether', 'which', 'while', 'whither', 'who', 'whoever',
-    'whole', 'whom', 'whose', 'why', 'will', 'with', 'within', 'without',
-    'would', 'yet', 'you', 'your', 'yours', 'yourself', 'yourselves',
-    )
-stop_word_dict={}
-for word in stop_words: stop_word_dict[word]=None
--- a/lib/python/SearchIndex/PluggableIndex.py
+++ b/lib/python/SearchIndex/PluggableIndex.py
-##############################################################################
-#
-# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-
-"""Pluggable Index Base Class """
-__version__='$Revision: 1.4 $'[11:-2]
-
-import Interface
-
-
-class PluggableIndex:
-
-    """Base pluggable index class"""
-
-
-    def getEntryForObject(self, documentId, default=None):
-        """Get all information contained for a specific object by documentId"""
-        pass
-
-    def index_object(self, documentId, obj, threshold=None):
-        """Index an object:
-
-           'documentId' is the integer ID of the document
-
-           'obj' is the object to be indexed
-
-           'threshold' is the number of words to process between committing
-           subtransactions.  If None, subtransactions are disabled"""
-
-        pass
-
-    def unindex_object(self, documentId):
-        """Remove the documentId from the index"""
-        pass
-
-
-    def uniqueValues(self, name=None, withLengths=0):
-        """Returns the unique values for name.
-
-        If 'withLengths' is true, returns a sequence of tuples of
-        (value, length)"""
-
-        pass
-
-    def _apply_index(self, request, cid=''):
-        """Apply the index to query parameters given in the argument, request.
-
-        The argument should be a mapping object.
-
-        If the request does not contain the needed parametrs, then None is
-        returned.
-
-        If the request contains a parameter with the name of the column
-        + "_usage", it is sniffed for information on how to handle applying
-        the index.
-
-        Otherwise two objects are returned.  The first object is a ResultSet
-        containing the record numbers of the matching records.  The second
-        object is a tuple containing the names of all data fields used."""
-
-        pass
-
-PluggableIndexInterface = Interface.impliedInterface(PluggableIndex)
-
-PluggableIndex.__implements__ = PluggableIndexInterface
--- a/lib/python/SearchIndex/README.txt
+++ b/lib/python/SearchIndex/README.txt
-The SearchIndex package is deprecated since Zope 2.4
-Instead use the re-factored modules in Products/PluginIndexes.
--- a/lib/python/SearchIndex/ResultList.py
+++ b/lib/python/SearchIndex/ResultList.py
-##############################################################################
-#
-# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-
-from BTrees.IIBTree import IIBucket
-from BTrees.IIBTree import weightedIntersection, weightedUnion, difference
-from BTrees.OOBTree import OOSet, union
-
-class ResultList:
-
-    def __init__(self, d, words, index, TupleType=type(())):
-        self._index = index
-
-        if type(words) is not OOSet: words=OOSet(words)
-        self._words = words
-
-        if (type(d) is TupleType):
-            d = IIBucket((d,))
-        elif type(d) is not IIBucket:
-            d = IIBucket(d)
-
-        self._dict=d
-        self.__getitem__=d.__getitem__
-        try: self.__nonzero__=d.__nonzero__
-        except: pass
-        self.get=d.get
-
-    def __nonzero__(self):
-        return not not self._dict
-
-    def bucket(self): return self._dict
-
-    def keys(self): return self._dict.keys()
-
-    def has_key(self, key): return self._dict.has_key(key)
-
-    def items(self): return self._dict.items()
-
-    def __and__(self, x):
-        return self.__class__(
-            weightedIntersection(self._dict, x._dict)[1],
-            union(self._words, x._words),
-            self._index,
-            )
-
-    def and_not(self, x):
-        return self.__class__(
-            difference(self._dict, x._dict),
-            self._words,
-            self._index,
-            )
-
-    def __or__(self, x):
-        return self.__class__(
-            weightedUnion(self._dict, x._dict)[1],
-            union(self._words, x._words),
-            self._index,
-            )
-        return self.__class__(result, self._words+x._words, self._index)
-
-    def near(self, x):
-        result = IIBucket()
-        dict = self._dict
-        xdict = x._dict
-        xhas = xdict.has_key
-        positions = self._index.positions
-        for id, score in dict.items():
-            if not xhas(id): continue
-            p=(map(lambda i: (i,0), positions(id,self._words))+
-               map(lambda i: (i,1), positions(id,x._words)))
-            p.sort()
-            d = lp = 9999
-            li = None
-            lsrc = None
-            for i,src in p:
-                if i is not li and src is not lsrc and li is not None:
-                    d = min(d,i-li)
-                li = i
-                lsrc = src
-            if d==lp: score = min(score,xdict[id]) # synonyms
-            else: score = (score+xdict[id])/d
-            result[id] = score
-
-        return self.__class__(
-            result, union(self._words, x._words), self._index)
--- a/lib/python/SearchIndex/Setup
+++ b/lib/python/SearchIndex/Setup
-*shared*
-Splitter Splitter.c
--- a/lib/python/SearchIndex/Splitter.c
+++ b/lib/python/SearchIndex/Splitter.c
-/*****************************************************************************
-
-  Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-  
-  This software is subject to the provisions of the Zope Public License,
-  Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
-  THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-  WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-  WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-  FOR A PARTICULAR PURPOSE
-  
- ****************************************************************************/
-#include "Python.h"
-#include <ctype.h>
-
-#define ASSIGN(V,E) {PyObject *__e; __e=(E); Py_XDECREF(V); (V)=__e;}
-#define UNLESS(E) if(!(E))
-#define UNLESS_ASSIGN(V,E) ASSIGN(V,E) UNLESS(V)
-
-typedef struct 
-{
-    PyObject_HEAD
-    PyObject *text, *synstop;
-    char *here, *end;
-    int index;
-} Splitter;
- 
-static PyObject *next_word(Splitter *, char **, char **);
-
-static void
-Splitter_reset(Splitter *self)
-{
-    self->here = PyString_AsString(self->text);
-    self->index = -1;
-}
-
-static void
-Splitter_dealloc(Splitter *self) 
-{
-    Py_XDECREF(self->text);
-    Py_XDECREF(self->synstop);
-    PyMem_DEL(self);
-}
-
-static int
-Splitter_length(Splitter *self)
-{
-    PyObject *res=0;
-
-    Splitter_reset(self);
-    while(1)
-      {
-	UNLESS_ASSIGN(res,next_word(self,NULL,NULL)) return -1;
-	UNLESS(PyString_Check(res))
-	  {
-	    Py_DECREF(res);
-	    break;
-	  }
-      }
-    return self->index+1;
-}
-
-static PyObject *
-Splitter_concat(Splitter *self, PyObject *other)
-{
-    PyErr_SetString(PyExc_TypeError, "Cannot concatenate Splitters.");
-    return NULL;
-}
-
-static PyObject *
-Splitter_repeat(Splitter *self, long n)
-{
-    PyErr_SetString(PyExc_TypeError, "Cannot repeat Splitters.");
-    return NULL;
-}
-
-/*
-  Map an input word to an output word by applying standard
-  filtering/mapping words, including synonyms/stop words.
-
-  Input is a word.
-  
-  Output is:
-
-     None -- The word is a stop word
-
-     sometext -- A replacement for the word
- */
-static PyObject *
-check_synstop(Splitter *self, PyObject *word)
-{
-    PyObject *value;
-    char *cword;
-    int len;
-    
-    cword = PyString_AsString(word);
-    len = PyString_Size(word);
-    if(len < 2)	/* Single-letter words are stop words! */
-    {
-      Py_INCREF(Py_None);
-      return Py_None;
-    }
-
-    /*************************************************************
-      Test whether a word has any letters.                       *
-                                                                 */    
-    for (; --len >= 0 && ! isalpha((unsigned char)cword[len]); );
-    if (len < 0)
-    {
-        Py_INCREF(Py_None);
-        return Py_None;
-    }
-    /*
-     * If no letters, treat it as a stop word.
-     *************************************************************/
-
-    Py_INCREF(word);
-
-    if (self->synstop == NULL) return word;
-
-    while ((value = PyObject_GetItem(self->synstop, word)) &&
-	   PyString_Check(value))
-    {
-        ASSIGN(word,value);
-	if(len++ > 100) break;	/* Avoid infinite recurssion */
-    }
-
-    if (value == NULL)
-    {
-        PyErr_Clear();
-        return word;
-    }
-
-    return value;		/* Which must be None! */
-}
- 
-#define MAX_WORD 64		/* Words longer than MAX_WORD are stemmed */
-   
-static PyObject *
-next_word(Splitter *self, char **startpos, char **endpos)
-{
-  char wbuf[MAX_WORD];
-  char *end, *here, *b;
-  int i = 0, c;
-  PyObject *pyword, *res;
-
-  here=self->here;
-  end=self->end;
-  b=wbuf;
-  while (here < end)
-    {
-      /* skip hyphens */ 
-      if ((i > 0) && (*here == '-'))
-        {
-	  here++;
-	  while (isspace((unsigned char) *here) && (here < end)) here++;
-	  continue;
-	}
-
-      c=tolower((unsigned char) *here);
-      
-      /* Check to see if this character is part of a word */
-      if(isalnum((unsigned char)c) || c=='/' || c=='_')
-        { /* Found a word character */
-	  if(startpos && i==0) *startpos=here;
-	  if(i++ < MAX_WORD) *b++ = c;
-        }
-      else if (i != 0)
-        { /* We've found the end of a word */
-	  if(i >= MAX_WORD) i=MAX_WORD; /* "stem" the long word */
-
-	  UNLESS(pyword = PyString_FromStringAndSize(wbuf, i))
-            {
-	      self->here=here;
-	      return NULL;
-	    }
-	  
-	  UNLESS(res = check_synstop(self, pyword))
-            {
-	      self->here=here;
-	      Py_DECREF(pyword);
-	      return NULL;
-	    }
-	  
-	  if (res != Py_None)
-            {
-	      if(endpos) *endpos=here;
-	      self->here=here;
-	      Py_DECREF(pyword);
-	      self->index++;
-	      return res;
-	    }
-
-	  /* The word is a stopword, so ignore it */ 
-
-	  Py_DECREF(res);          
-	  Py_DECREF(pyword);
-	  i = 0;
-	  b=wbuf;
-        }
-      
-      here++;
-    }
-
-  self->here=here;
-
-  /* We've reached the end of the string */
-
-  if(i >= MAX_WORD) i=MAX_WORD; /* "stem" the long word */
-  if (i == 0)
-    { 
-      /* No words */
-      self->here=here;
-      Py_INCREF(Py_None);
-      return Py_None;
-    }
-  
-  UNLESS(pyword = PyString_FromStringAndSize(wbuf, i)) return NULL;
-  
-  if(endpos) *endpos=here;
-  res = check_synstop(self, pyword);
-  Py_DECREF(pyword);
-  if(PyString_Check(res)) self->index++;
-  return res;
-}
-
-static PyObject *
-Splitter_item(Splitter *self, int i)
-{
-    PyObject *word = NULL;
-
-    if (i <= self->index) Splitter_reset(self);
-
-    while(self->index < i)
-    {
-        Py_XDECREF(word);
-
-        UNLESS(word = next_word(self,NULL,NULL)) return NULL; 
-        if (word == Py_None)
-        {
-            Py_DECREF(word);
-            PyErr_SetString(PyExc_IndexError,
-			    "Splitter index out of range");
-            return NULL;
-        }
-    }
-
-    return word;
-}
-
-static PyObject *
-Splitter_slice(Splitter *self, int i, int j)
-{
-    PyErr_SetString(PyExc_TypeError, "Cannot slice Splitters.");
-    return NULL;
-}
-
-static PySequenceMethods Splitter_as_sequence = {
-    (inquiry)Splitter_length,        /*sq_length*/
-    (binaryfunc)Splitter_concat,     /*sq_concat*/
-    (intargfunc)Splitter_repeat,     /*sq_repeat*/
-    (intargfunc)Splitter_item,       /*sq_item*/
-    (intintargfunc)Splitter_slice,   /*sq_slice*/
-    (intobjargproc)0,                    /*sq_ass_item*/
-    (intintobjargproc)0,                 /*sq_ass_slice*/
-};
-
-static PyObject *
-Splitter_pos(Splitter *self, PyObject *args)
-{
-    char *start, *end, *ctext;
-    PyObject *res;
-    int i;
-
-    UNLESS(PyArg_Parse(args, "i", &i)) return NULL;
-
-    if (i <= self->index) Splitter_reset(self);
-
-    while(self->index < i)
-    {
-	UNLESS(res=next_word(self, &start, &end)) return NULL;
-	if(PyString_Check(res))
-	  {
-            self->index++;
-	    Py_DECREF(res);
-	    continue;
-	  }
-	Py_DECREF(res);
-	PyErr_SetString(PyExc_IndexError, "Splitter index out of range");
-	return NULL;
-    }
-
-    ctext=PyString_AsString(self->text);
-    return Py_BuildValue("(ii)", start - ctext, end - ctext);
-}
-
-static PyObject *
-Splitter_indexes(Splitter *self, PyObject *args)
-{
-  PyObject *word, *r, *w=0, *index=0;
-  int i=0;
-
-  UNLESS(PyArg_ParseTuple(args,"O",&word)) return NULL;
-  UNLESS(r=PyList_New(0)) return NULL;
-  UNLESS(word=check_synstop(self, word)) goto err;
-
-  Splitter_reset(self);
-  while(1)
-    {
-      UNLESS_ASSIGN(w,next_word(self, NULL, NULL)) goto err;
-      UNLESS(PyString_Check(w)) break;
-      if(PyObject_Compare(word,w)==0)
-	{
-	  UNLESS_ASSIGN(index,PyInt_FromLong(i)) goto err;
-	  if(PyList_Append(r,index) < 0) goto err;
-	}
-      i++;
-    }
-  Py_XDECREF(w);
-  Py_XDECREF(index);
-  return r;
-
-err:
-  Py_DECREF(r);
-  Py_XDECREF(index);
-  return NULL;
-}
-
-static struct PyMethodDef Splitter_methods[] = {
-    { "pos", (PyCFunction)Splitter_pos, 0,
-      "pos(index) -- Return the starting and ending position of a token" },
-    { "indexes", (PyCFunction)Splitter_indexes, METH_VARARGS,
-      "indexes(word) -- Return al list of the indexes of word in the sequence",
-    },
-    { NULL, NULL }		/* sentinel */
-};
-
-static PyObject *
-Splitter_getattr(Splitter *self, char *name) 
-{
-    return Py_FindMethod(Splitter_methods, (PyObject *)self, name);
-}
-
-static char SplitterType__doc__[] = "";
-
-static PyTypeObject SplitterType = {
-    PyObject_HEAD_INIT(NULL)
-    0,                                 /*ob_size*/
-    "Splitter",                    /*tp_name*/
-    sizeof(Splitter),              /*tp_basicsize*/
-    0,                                 /*tp_itemsize*/
-    /* methods */
-    (destructor)Splitter_dealloc,  /*tp_dealloc*/
-    (printfunc)0,                      /*tp_print*/
-    (getattrfunc)Splitter_getattr, /*tp_getattr*/
-    (setattrfunc)0,                    /*tp_setattr*/
-    (cmpfunc)0,                        /*tp_compare*/
-    (reprfunc)0,                       /*tp_repr*/
-    0,                                 /*tp_as_number*/
-    &Splitter_as_sequence,         /*tp_as_sequence*/
-    0,                                 /*tp_as_mapping*/
-    (hashfunc)0,                       /*tp_hash*/
-    (ternaryfunc)0,                    /*tp_call*/
-    (reprfunc)0,                       /*tp_str*/
-
-    /* Space for future expansion */
-    0L,0L,0L,0L,
-    SplitterType__doc__ /* Documentation string */
-};
-
-static PyObject *
-get_Splitter(PyObject *modinfo, PyObject *args)
-{
-    Splitter *self;
-    PyObject *doc, *synstop = NULL;
-
-    UNLESS(PyArg_ParseTuple(args,"O|O",&doc,&synstop)) return NULL;
-
-    UNLESS(self = PyObject_NEW(Splitter, &SplitterType)) return NULL;
-
-    if(synstop)
-      {
-	self->synstop=synstop;
-	Py_INCREF(synstop);
-      }
-    else self->synstop=NULL;
-
-    UNLESS(self->text = PyObject_Str(doc)) goto err;
-    UNLESS(self->here=PyString_AsString(self->text)) goto err;
-    self->end = self->here + PyString_Size(self->text);
-    self->index = -1;
-    return (PyObject*)self;
-err:
-    Py_DECREF(self);
-    return NULL;
-}
-
-static struct PyMethodDef Splitter_module_methods[] = {
-    { "Splitter", (PyCFunction)get_Splitter, METH_VARARGS,
-      "Splitter(doc[,synstop]) -- Return a word splitter" },
-    { NULL, NULL }
-};
-
-static char Splitter_module_documentation[] = 
-"Parse source strings into sequences of words\n"
-"\n"
-"for use in an inverted index\n"
-"\n"
-"$Id: Splitter.c,v 1.19 2002/03/21 15:48:55 htrd Exp $\n"
-;
-
-
-void
-initSplitter(void)
-{
-  PyObject *m, *d;
-  
-  /* Create the module and add the functions */
-  m = Py_InitModule4("Splitter", Splitter_module_methods,
-                     Splitter_module_documentation,
-                     (PyObject*)NULL,PYTHON_API_VERSION);
-  
-  /* Add some symbolic constants to the module */
-  d = PyModule_GetDict(m);
-
-  if (PyErr_Occurred()) Py_FatalError("can't initialize module Splitter");
-}
--- a/lib/python/SearchIndex/TextIndex.py
+++ b/lib/python/SearchIndex/TextIndex.py
-##############################################################################
-#
-# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-
-"""Text Index
-
-Notes on a new text index design
-
-  The current inverted index algoirthm works well enough for our needs.
-  Speed of the algorithm does not seem to be a problem, however, data
-  management *is* a significant problem.  In particular:
-
-    - Process size grows unacceptably *during mass indexing*.
-
-    - Data load and store seems to take too long.  For example,
-      clearing an inverted index and committing takes a significant
-      amount of time.
-
-    - The current trie data structure contributes significantly to the
-      number of objects in the system.
-
-    - Removal/update of documents is especially problematic.  We have
-      to either:
-
-      - Unindex old version of an object before updating it.  This is
-        a real hassle for apps like sws.
-
-      - Tool through entire index looking for object references.  This
-        is *totally* impractical.
-
-  Some observations of competition:
-
-    - Xerox system can index "5-million word document in 256k".  What
-      does this mean?
-
-        - Does the system save word positions as we do?
-
-        - What is the index indexing?
-
-        - What was the vocabulary of the system?
-
-      Let\'s see.  Assume a 10,000 word vocabulary.  Then we use
-      25-bytes per entry.  Hm.....
-
-    - Verity has some sense of indexing in phases and packing index.
-      Verity keeps the index in multiple chunks and a search may
-      operate on multiple chunks.  This means that we can add data
-      without updating large records.
-
-      This may be especially handy for mass updates, like we do in
-      cv3.  In a sense we do this in cv3 and sws.  We index a large
-      batch of documents to a temporary index and then merge changes
-      in.
-
-      If "temporary" index was integral to system, then maybe merger
-      could be done as a background task....
-
-  Tree issues
-
-    Tree structures benefit small updates, because an update to an
-    entry does not cause update of entire tree, however, each node in
-    tree introduces overhead.
-
-    Trie structure currently introduces an excessive number of nodes.
-    Typically, a node per two or three words.  Trie has potential to
-    reduce storage because key storage is shared between words.
-
-    Maybe an alternative to a Trie is some sort of nested BTree.  Or
-    maybe a Trie with some kind of binary-search-based indexing.
-
-    Suppose that:
-
-      - database objects were at leaves of tree
-      - vocabulary was finite
-      - we don\'t remove a leaf when it becomes empty
-
-    Then:
-
-      - After some point, tree objects no longer change
-
-    If this is case, then it doesn\'t make sense to optimize tree for
-    change.
-
-  Additional notes
-
-    Stemming reduces the number of words substantially.
-
-  Proposal -- new TextIndex
-
-    TextIndex -- word -> textSearchResult
-
-      Implemented with:
-
-        InvertedIndex -- word -> idSet
-
-        ResultIndex -- id -> docData
-
-        where:
-
-          word -- is a token, typically a word, but could be a name or a
-                  number
-
-          textSearchResult -- id -> (score, positions)
-
-          id -- integer, say 4-byte.
-
-          positions -- sequence of integers.
-
-          score -- numeric measure of relevence, f(numberOfWords, positions)
-
-          numberOfWords -- number of words in source document.
-
-          idSet -- set of ids
-
-          docData -- numberOfWords, word->positions
-
-       Note that ids and positions are ints.  We will build C
-       extensions for efficiently storing and pickling structures
-       with lots of ints.  This should significantly improve space
-       overhead and storage/retrieveal times, as well as storeage
-       space.
-
-"""
-__version__='$Revision: 1.32 $'[11:-2]
-
-#XXX I strongly suspect that this is broken, but I'm not going to fix it. :(
-
-from Globals import Persistent
-from BTrees.OOBTree import OOBTree
-from BTrees.IIBTree import IISet, IIBucket
-import operator
-from Splitter import Splitter
-from string import strip
-import string, re
-
-from Lexicon import Lexicon, stop_word_dict
-from ResultList import ResultList
-
-class TextIndex(Persistent):
-    
-    isDeprecatedIndex = 1
-
-    def __init__(self, data=None, schema=None, id=None,
-                 ignore_ex=None, call_methods=None):
-        """Create an index
-
-        The arguments are:
-
-          'data' -- a mapping from integer object ids to objects or
-          records,
-
-          'schema' -- a mapping from item name to index into data
-          records.  If 'data' is a mapping to objects, then schema
-          should ne 'None'.
-
-          'id' -- the name of the item attribute to index.  This is
-          either an attribute name or a record key.
-
-          'ignore_ex' -- Tells the indexer to ignore exceptions that
-          are rasied when indexing an object.
-
-          'call_methods' -- Tells the indexer to call methods instead
-          of getattr or getitem to get an attribute.
-
-        """
-        ######################################################################
-        # For b/w compatability, have to allow __init__ calls with zero args
-        if not data==schema==id==ignore_ex==call_methods==None:
-            self._data=data
-            self._schema=schema
-            self.id=id
-            self.ignore_ex=ignore_ex
-            self.call_methods=call_methods
-            self._index=OOBTree() #XXX Is this really an IOBTree?
-            self._syn=stop_word_dict
-            self._reindex()
-        else:
-            pass
-
-    # for backwards compatability
-    _init = __init__
-
-
-    def clear(self):
-        self._index = OOBTree()
-
-
-    def positions(self, docid, words):
-        """Return the positions in the document for the given document
-        id of the word, word."""
-        id = self.id
-
-        if self._schema is None:
-            f = getattr
-        else:
-            f = operator.__getitem__
-            id = self._schema[id]
-
-
-        row = self._data[docid]
-
-        if self.call_methods:
-            doc = str(f(row, id)())
-        else:
-            doc = str(f(row, id))
-
-        r = []
-        for word in words:
-            r = r+Splitter(doc, self._syn).indexes(word)
-        return r
-
-
-    def index_item(self, i, obj=None, un=0):
-        """Recompute index data for data with ids >= start.
-        if 'obj' is passed in, it is indexed instead of _data[i]"""
-
-        id = self.id
-        if (self._schema is None) or (obj is not None):
-            f = getattr
-        else:
-            f = operator.__getitem__
-            id = self._schema[id]
-
-        if obj is None:
-            obj = self._data[i]
-
-        try:
-            if self.call_methods:
-                k = str(f(obj, id)())
-            else:
-                k = str(f(obj, id))
-
-            self._index_document(k, i ,un)
-        except:
-            pass
-
-
-    def unindex_item(self, i, obj=None):
-        return self.index_item(i, obj, 1)
-
-
-    def _reindex(self, start=0):
-        """Recompute index data for data with ids >= start."""
-        for i in self._data.keys(start): self.index_item(i)
-
-
-    def _index_document(self, document_text, id, un=0,
-                        tupleType=type(()),
-                        dictType=type({}),
-                        ):
-        src = Splitter(document_text, self._syn)
-
-        d = {}
-        old = d.has_key
-        last = None
-
-        for s in src:
-            if s[0] == '\"': last=self.subindex(s[1:-1], d, old, last)
-            else:
-                if old(s):
-                    if s != last: d[s] = d[s]+1
-                else: d[s] = 1
-
-        index = self._index
-        get = index.get
-        if un:
-            for word,score in d.items():
-                r = get(word)
-                if r is not None:
-                    if type(r) is tupleType: del index[word]
-                    else:
-                        if r.has_key(id): del r[id]
-                        if type(r) is dictType:
-                            if len(r) < 2:
-                                if r:
-                                    for k, v in r.items(): index[word] = k,v
-                                else: del index[word]
-                            else: index[word] = r
-        else:
-            for word,score in d.items():
-                r = get(word)
-                if r is not None:
-                    r = index[word]
-                    if type(r) is tupleType:
-                        r = {r[0]:r[1]}
-                        r[id] = score
-                        index[word] = r
-                    elif type(r) is dictType:
-                        if len(r) > 4:
-                            b = IIBucket()
-                            for k, v in r.items(): b[k] = v
-                            r = b
-                        r[id] = score
-                        index[word] = r
-                    else: r[id] = score
-                else: index[word] = id, score
-
-
-    def _subindex(self, isrc, d, old, last):
-
-        src = Splitter(isrc, self._syn)
-
-        for s in src:
-            if s[0] == '\"': last=self.subindex(s[1:-1],d,old,last)
-            else:
-                if old(s):
-                    if s != last: d[s] = d[s]+1
-                else: d[s] = 1
-
-        return last
-
-
-    def __getitem__(self, word):
-        """Return an InvertedIndex-style result "list"
-        """
-        src = tuple(Splitter(word, self._syn))
-        if not src: return ResultList({}, (word,), self)
-        if len(src) == 1:
-            src=src[0]
-            if src[:1]=='"' and src[-1:]=='"': return self[src]
-            r = self._index.get(word,None)
-            if r is None: r = {}
-            return ResultList(r, (word,), self)
-
-        r = None
-        for word in src:
-            rr = self[word]
-            if r is None: r = rr
-            else: r = r.near(rr)
-
-        return r
-
-
-    def _apply_index(self, request, cid='', ListType=[]):
-        """ Apply the index to query parameters given in the argument,
-        request
-
-        The argument should be a mapping object.
-
-        If the request does not contain the needed parameters, then
-        None is returned.
-
-        Otherwise two objects are returned.  The first object is a
-        ResultSet containing the record numbers of the matching
-        records.  The second object is a tuple containing the names of
-        all data fields used.
-        """
-
-        id = self.id
-
-        cidid = "%s/%s" % (cid, id)
-        has_key = request.has_key
-        if has_key(cidid): keys = request[cidid]
-        elif has_key(id): keys =request[id]
-        else: return None
-
-        if type(keys) is type(''):
-            if not keys or not strip(keys): return None
-            keys = [keys]
-        r = None
-        for key in keys:
-            key = strip(key)
-            if not key: continue
-            rr = IISet()
-            try:
-                for i,score in query(key,self).items():
-                    if score: rr.insert(i)
-            except KeyError: pass
-            if r is None: r = rr
-            else:
-                # Note that we *and*/*narrow* multiple search terms.
-                r = r.intersection(rr)
-
-        if r is not None: return r, (id,)
-        return IISet(), (id,)
-
-
-AndNot    = 'andnot'
-And       = 'and'
-Or        = 'or'
-Near = '...'
-QueryError='TextIndex.QueryError'
-
-def query(s, index, default_operator = Or,
-          ws = (string.whitespace,)):
-    # First replace any occurences of " and not " with " andnot "
-    s = re.sub('[%s]+and[%s]+not[%s]+' % (ws * 3), ' andnot ', s)
-    q = parse(s)
-    q = parse2(q, default_operator)
-    return evaluate(q, index)
-
-def parse(s):
-    '''Parse parentheses and quotes'''
-    l = []
-    tmp = string.lower(s)
-
-    while (1):
-        p = parens(tmp)
-
-        if (p is None):
-            # No parentheses found.  Look for quotes then exit.
-            l = l + quotes(tmp)
-            break
-        else:
-            # Look for quotes in the section of the string before
-            # the parentheses, then parse the string inside the parens
-            l = l + quotes(tmp[:(p[0] - 1)])
-            l.append(parse(tmp[p[0] : p[1]]))
-
-            # continue looking through the rest of the string
-            tmp = tmp[(p[1] + 1):]
-
-    return l
-
-def parse2(q, default_operator,
-           operator_dict = {AndNot: AndNot, And: And, Or: Or, Near: Near},
-           ListType=type([]),
-           ):
-    '''Find operators and operands'''
-    i = 0
-    isop=operator_dict.has_key
-    while (i < len(q)):
-        if (type(q[i]) is ListType): q[i] = parse2(q[i], default_operator)
-
-        # every other item, starting with the first, should be an operand
-        if ((i % 2) != 0):
-            # This word should be an operator; if it is not, splice in
-            # the default operator.
-
-            if type(q[i]) is not ListType and isop(q[i]):
-                q[i] = operator_dict[q[i]]
-            else: q[i : i] = [ default_operator ]
-
-        i = i + 1
-
-    return q
-
-
-def parens(s, parens_re = re.compile(r'(\|)').search):
-
-    index=open_index=paren_count = 0
-
-    while 1:
-        index = parens_re(s, index)
-        if index is None : break
-
-        if s[index] == '(':
-            paren_count = paren_count + 1
-            if open_index == 0 : open_index = index + 1
-        else:
-            paren_count = paren_count - 1
-
-        if paren_count == 0:
-            return open_index, index
-        else:
-            index = index + 1
-
-    if paren_count == 0: # No parentheses Found
-        return None
-    else:
-        raise QueryError, "Mismatched parentheses"
-
-
-
-def quotes(s, ws = (string.whitespace,)):
-    # split up quoted regions
-    splitted = re.split( '[%s]*\"[%s]*' % (ws * 2),s)
-    split=string.split
-
-    if (len(splitted) > 1):
-        if ((len(splitted) % 2) == 0): raise QueryError, "Mismatched quotes"
-
-        for i in range(1,len(splitted),2):
-            # split the quoted region into words
-            splitted[i] = filter(None, split(splitted[i]))
-
-            # put the Proxmity operator in between quoted words
-            for j in range(1, len(splitted[i])):
-                splitted[i][j : j] = [ Near ]
-
-        for i in range(len(splitted)-1,-1,-2):
-            # split the non-quoted region into words
-            splitted[i:i+1] = filter(None, split(splitted[i]))
-
-        splitted = filter(None, splitted)
-    else:
-        # No quotes, so just split the string into words
-        splitted = filter(None, split(s))
-
-    return splitted
-
-def get_operands(q, i, index, ListType=type([]), StringType=type('')):
-    '''Evaluate and return the left and right operands for an operator'''
-    try:
-        left  = q[i - 1]
-        right = q[i + 1]
-    except IndexError: raise QueryError, "Malformed query"
-
-    t=type(left)
-    if t is ListType: left = evaluate(left, index)
-    elif t is StringType: left=index[left]
-
-    t=type(right)
-    if t is ListType: right = evaluate(right, index)
-    elif t is StringType: right=index[right]
-
-    return (left, right)
-
-def evaluate(q, index, ListType=type([])):
-    '''Evaluate a parsed query'''
-##    import pdb
-##    pdb.set_trace()
-
-    if (len(q) == 1):
-        if (type(q[0]) is ListType):
-            return evaluate(q[0], index)
-
-        return index[q[0]]
-
-    i = 0
-    while (i < len(q)):
-        if q[i] is AndNot:
-            left, right = get_operands(q, i, index)
-            val = left.and_not(right)
-            q[(i - 1) : (i + 2)] = [ val ]
-        else: i = i + 1
-
-    i = 0
-    while (i < len(q)):
-        if q[i] is And:
-            left, right = get_operands(q, i, index)
-            val = left & right
-            q[(i - 1) : (i + 2)] = [ val ]
-        else: i = i + 1
-
-    i = 0
-    while (i < len(q)):
-        if q[i] is Or:
-            left, right = get_operands(q, i, index)
-            val = left | right
-            q[(i - 1) : (i + 2)] = [ val ]
-        else: i = i + 1
-
-    i = 0
-    while (i < len(q)):
-        if q[i] is Near:
-            left, right = get_operands(q, i, index)
-            val = left.near(right)
-            q[(i - 1) : (i + 2)] = [ val ]
-        else: i = i + 1
-
-    if (len(q) != 1): raise QueryError, "Malformed query"
-
-    return q[0]
--- a/lib/python/SearchIndex/UnIndex.py
+++ b/lib/python/SearchIndex/UnIndex.py
-##############################################################################
-#
-# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-
-"""Simple column indices"""
-
-__version__='$Revision: 1.35 $'[11:-2]
-
-from Globals import Persistent
-from Acquisition import Implicit
-import string
-from zLOG import LOG, ERROR
-from types import StringType, ListType, IntType, TupleType
-
-from BTrees.OOBTree import OOBTree, OOSet
-from BTrees.IOBTree import IOBTree
-from BTrees.IIBTree import IITreeSet, IISet, union
-import BTrees.Length
-
-import sys
-
-_marker = []
-
-class UnIndex(Persistent, Implicit):
-    """UnIndex object interface"""
-
-    meta_type = 'Field Index'
-    
-    isDeprecatedIndex = 1
-
-    def __init__(self, id, ignore_ex=None, call_methods=None):
-        """Create an unindex
-
-        UnIndexes are indexes that contain two index components, the
-        forward index (like plain index objects) and an inverted
-        index.  The inverted index is so that objects can be unindexed
-        even when the old value of the object is not known.
-
-        e.g.
-
-        self._index = {datum:[documentId1, documentId2]}
-        self._unindex = {documentId:datum}
-
-        If any item in self._index has a length-one value, the value is an
-        integer, and not a set.  There are special cases in the code to deal
-        with this.
-
-        The arguments are:
-
-          'id' -- the name of the item attribute to index.  This is
-          either an attribute name or a record key.
-
-          'ignore_ex' -- should be set to true if you want the index
-          to ignore exceptions raised while indexing instead of
-          propagating them.
-
-          'call_methods' -- should be set to true if you want the index
-          to call the attribute 'id' (note: 'id' should be callable!)
-          You will also need to pass in an object in the index and
-          uninded methods for this to work.
-
-        """
-        self.id = id
-        self.ignore_ex=ignore_ex        # currently unimplimented
-        self.call_methods=call_methods
-
-        # Note that it was unfortunate to use __len__ as the attribute
-        # name here. New-style classes cache slot methods in C slot
-        # pointers. The result is that instances can't override slots.
-        # This is not easy to change on account of old objects with
-        # __len__ attr.
-
-        self.__len__=BTrees.Length.Length()
-        self.clear()
-
-    def __len__(self):
-        try:
-            return self.__dict__['__len__']()
-        except KeyError:
-            # Fallback for really old indexes
-            return len(self._unindex)
-
-    def clear(self):
-        # inplace opportunistic conversion from old-style to new style BTrees
-        try: self.__len__.set(0)
-        except AttributeError: self.__len__=BTrees.Length.Length()
-        self._index = OOBTree()
-        self._unindex = IOBTree()
-
-    def _convertBTrees(self, threshold=200):
-        if type(self._index) is OOBTree: return
-
-        from BTrees.convert import convert
-
-        _index=self._index
-        self._index=OOBTree()
-
-        def convertSet(s,
-                       IITreeSet=IITreeSet, IntType=type(0),
-                       type=type, len=len,
-                       doneTypes = (IntType, IITreeSet)):
-
-            if type(s) in doneTypes: return s
-
-            if len(s) == 1:
-                try: return s[0]  # convert to int
-                except: pass # This is just an optimization.
-
-            return IITreeSet(s)
-
-        convert(_index, self._index, threshold, convertSet)
-
-        _unindex=self._unindex
-        self._unindex=IOBTree()
-        convert(_unindex, self._unindex, threshold)
-
-        self.__len__=BTrees.Length.Length(len(_index))
-
-    def __nonzero__(self):
-        return not not self._unindex
-
-    def histogram(self):
-        """Return a mapping which provides a histogram of the number of
-        elements found at each point in the index."""
-
-        histogram = {}
-        for item in self._index.items():
-            if type(item) is IntType:
-                entry = 1 # "set" length is 1
-            else:
-                key, value = item
-                entry = len(value)
-            histogram[entry] = histogram.get(entry, 0) + 1
-
-        return histogram
-
-
-    def referencedObjects(self):
-        """Generate a list of IDs for which we have referenced objects."""
-        return self._unindex.keys()
-
-
-    def getEntryForObject(self, documentId, default=_marker):
-        """Takes a document ID and returns all the information we have
-        on that specific object."""
-        if default is _marker:
-            return self._unindex.get(documentId)
-        else:
-            return self._unindex.get(documentId, default)
-
-
-    def removeForwardIndexEntry(self, entry, documentId):
-        """Take the entry provided and remove any reference to documentId
-        in its entry in the index."""
-        global _marker
-        indexRow = self._index.get(entry, _marker)
-        if indexRow is not _marker:
-            try:
-                indexRow.remove(documentId)
-                if not indexRow:
-                    del self._index[entry]
-                    try: self.__len__.change(-1)
-                    except AttributeError: pass # pre-BTrees-module instance
-            except AttributeError:
-                # index row is an int
-                del self._index[entry]
-                try: self.__len__.change(-1)
-                except AttributeError: pass # pre-BTrees-module instance
-            except:
-                LOG(self.__class__.__name__, ERROR,
-                    ('unindex_object could not remove '
-                     'documentId %s from index %s.  This '
-                     'should not happen.'
-                     % (str(documentId), str(self.id))), '',
-                    sys.exc_info())
-        else:
-            LOG(self.__class__.__name__, ERROR,
-                ('unindex_object tried to retrieve set %s '
-                 'from index %s but couldn\'t.  This '
-                 'should not happen.' % (repr(entry), str(self.id))))
-
-
-    def insertForwardIndexEntry(self, entry, documentId):
-        """Take the entry provided and put it in the correct place
-        in the forward index.
-
-        This will also deal with creating the entire row if necessary."""
-        global _marker
-        indexRow = self._index.get(entry, _marker)
-
-        # Make sure there's actually a row there already.  If not, create
-        # an IntSet and stuff it in first.
-        if indexRow is _marker:
-            self._index[entry] = documentId
-            try:  self.__len__.change(1)
-            except AttributeError: pass # pre-BTrees-module instance
-        else:
-            try: indexRow.insert(documentId)
-            except AttributeError:
-                # index row is an int
-                indexRow=IITreeSet((indexRow, documentId))
-                self._index[entry] = indexRow
-
-    def index_object(self, documentId, obj, threshold=None):
-        """ index and object 'obj' with integer id 'documentId'"""
-        global _marker
-        returnStatus = 0
-
-        # First we need to see if there's anything interesting to look at
-        # self.id is the name of the index, which is also the name of the
-        # attribute we're interested in.  If the attribute is callable,
-        # we'll do so.
-        try:
-            datum = getattr(obj, self.id)
-            if callable(datum):
-                datum = datum()
-        except AttributeError:
-            datum = _marker
-
-        # We don't want to do anything that we don't have to here, so we'll
-        # check to see if the new and existing information is the same.
-        oldDatum = self._unindex.get(documentId, _marker)
-        if datum != oldDatum:
-            if oldDatum is not _marker:
-                self.removeForwardIndexEntry(oldDatum, documentId)
-
-            if datum is not _marker:
-                self.insertForwardIndexEntry(datum, documentId)
-                self._unindex[documentId] = datum
-
-            returnStatus = 1
-
-        return returnStatus
-
-    def unindex_object(self, documentId):
-        """ Unindex the object with integer id 'documentId' and don't
-        raise an exception if we fail """
-
-        global _marker
-        unindexRecord = self._unindex.get(documentId, _marker)
-        if unindexRecord is _marker:
-            return None
-
-        self.removeForwardIndexEntry(unindexRecord, documentId)
-
-        try:
-            del self._unindex[documentId]
-        except:
-            LOG('UnIndex', ERROR, 'Attempt to unindex nonexistent document'
-                ' with id %s' % documentId)
-
-    def _apply_index(self, request, cid='', type=type):
-        """Apply the index to query parameters given in the request arg.
-
-        The request argument should be a mapping object.
-
-        If the request does not have a key which matches the "id" of
-        the index instance, then None is returned.
-
-        If the request *does* have a key which matches the "id" of
-        the index instance, one of a few things can happen:
-
-          - if the value is a blank string, None is returned (in
-            order to support requests from web forms where
-            you can't tell a blank string from empty).
-
-          - if the value is a nonblank string, turn the value into
-            a single-element sequence, and proceed.
-
-          - if the value is a sequence, return a union search.
-
-        If the request contains a parameter with the name of the
-        column + '_usage', it is sniffed for information on how to
-        handle applying the index.
-
-        If None is not returned as a result of the abovementioned
-        constraints, two objects are returned.  The first object is a
-        ResultSet containing the record numbers of the matching
-        records.  The second object is a tuple containing the names of
-        all data fields used.
-
-        FAQ answer:  to search a Field Index for documents that
-        have a blank string as their value, wrap the request value
-        up in a tuple ala: request = {'id':('',)}
-
-        """
-        id = self.id              #name of the column
-
-        cidid = "%s/%s" % (cid,id)
-
-        # i have no f'ing clue what this cdid stuff is for - chrism
-        if request.has_key(cidid):
-            keys = request[cidid]
-        elif request.has_key(id):
-            keys = request[id]
-        else:
-            return None
-
-        if type(keys) not in (ListType, TupleType):
-            if keys == '':
-                return None
-            else:
-                keys = [keys]
-
-        index = self._index
-        r = None
-        opr = None
-
-        if request.has_key(id+'_usage'):
-            # see if any usage params are sent to field
-            opr=string.split(string.lower(request[id+"_usage"]),':')
-            opr, opr_args=opr[0], opr[1:]
-
-        if opr=="range":   # range search
-            if 'min' in opr_args: lo = min(keys)
-            else: lo = None
-            if 'max' in opr_args: hi = max(keys)
-            else: hi = None
-            if hi:
-                setlist = index.items(lo,hi)
-            else:
-                setlist = index.items(lo)
-
-            for k, set in setlist:
-                if type(set) is IntType:
-                    set = IISet((set,))
-                r = union(r, set)
-        else: # not a range search
-            for key in keys:
-                set=index.get(key, None)
-                if set is not None:
-                    if type(set) is IntType:
-                        set = IISet((set,))
-                    r = union(r, set)
-
-        if type(r) is IntType:  r=IISet((r,))
-        if r is None:
-            return IISet(), (id,)
-        else:
-            return r, (id,)
-
-    def hasUniqueValuesFor(self, name):
-        ' has unique values for column NAME '
-        if name == self.id:
-            return 1
-        else:
-            return 0
-
-
-    def uniqueValues(self, name=None, withLengths=0):
-        """\
-        returns the unique values for name
-
-        if withLengths is true, returns a sequence of
-        tuples of (value, length)
-        """
-        if name is None:
-            name = self.id
-        elif name != self.id:
-            return []
-
-        if not withLengths:
-            return tuple(self._index.keys())
-        else:
-            rl=[]
-            for i in self._index.keys():
-                set = self._index[i]
-                if type(set) is IntType:
-                    l = 1
-                else:
-                    l = len(set)
-                rl.append((i, l))
-            return tuple(rl)
-
-    def keyForDocument(self, id):
-        return self._unindex[id]
-
-    def items(self):
-        items = []
-        for k,v in self._index.items():
-            if type(v) is IntType:
-                v = IISet((v,))
-            items.append((k, v))
-        return items
--- a/lib/python/SearchIndex/UnKeywordIndex.py
+++ b/lib/python/SearchIndex/UnKeywordIndex.py
-##############################################################################
-#
-# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-
-from UnIndex import UnIndex
-from zLOG import LOG, ERROR
-from types import StringType
-from BTrees.OOBTree import OOSet, difference
-
-class UnKeywordIndex(UnIndex):
-
-    meta_type = 'Keyword Index'
-
-    """Like an UnIndex only it indexes sequences of items
-
-    Searches match any keyword.
-
-    This should have an _apply_index that returns a relevance score
-    """
-
-    def index_object(self, documentId, obj, threshold=None):
-        """ index an object 'obj' with integer id 'i'
-
-        Ideally, we've been passed a sequence of some sort that we
-        can iterate over. If however, we haven't, we should do something
-        useful with the results. In the case of a string, this means
-        indexing the entire string as a keyword."""
-
-        # First we need to see if there's anything interesting to look at
-        # self.id is the name of the index, which is also the name of the
-        # attribute we're interested in.  If the attribute is callable,
-        # we'll do so.
-        newKeywords = getattr(obj, self.id, ())
-        if callable(newKeywords):
-            newKeywords = newKeywords()
-
-        if type(newKeywords) is StringType:
-            newKeywords = (newKeywords, )
-
-        oldKeywords = self._unindex.get(documentId, None)
-
-        if oldKeywords is None:
-            # we've got a new document, let's not futz around.
-            try:
-                for kw in newKeywords:
-                    self.insertForwardIndexEntry(kw, documentId)
-                self._unindex[documentId] = list(newKeywords)
-            except TypeError:
-                return 0
-        else:
-            # we have an existing entry for this document, and we need
-            # to figure out if any of the keywords have actually changed
-            if type(oldKeywords) is not OOSet: oldKeywords=OOSet(oldKeywords)
-            newKeywords=OOSet(newKeywords)
-            fdiff = difference(oldKeywords, newKeywords)
-            rdiff = difference(newKeywords, oldKeywords)
-            if fdiff or rdiff:
-                # if we've got forward or reverse changes
-                self._unindex[documentId] = list(newKeywords)
-                if fdiff:
-                    self.unindex_objectKeywords(documentId, fdiff)
-                if rdiff:
-                    for kw in rdiff:
-                        self.insertForwardIndexEntry(kw, documentId)
-        return 1
-
-    def unindex_objectKeywords(self, documentId, keywords):
-        """ carefully unindex the object with integer id 'documentId'"""
-
-        if keywords is not None:
-            for kw in keywords:
-                self.removeForwardIndexEntry(kw, documentId)
-
-    def unindex_object(self, documentId):
-        """ carefully unindex the object with integer id 'documentId'"""
-
-        keywords = self._unindex.get(documentId, None)
-        self.unindex_objectKeywords(documentId, keywords)
-        try:
-            del self._unindex[documentId]
-        except KeyError:
-            LOG('UnKeywordIndex', ERROR, 'Attempt to unindex nonexistent'
-                ' document id %s' % documentId)
--- a/lib/python/SearchIndex/UnTextIndex.py
+++ b/lib/python/SearchIndex/UnTextIndex.py
-##############################################################################
-#
-# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-
-"""Text Index
-
-The UnTextIndex falls under the 'I didnt have a better name for it'
-excuse.  It is an 'Un' Text index because it stores a little bit of
-undo information so that objects can be unindexed when the old value
-is no longer known.
-"""
-
-__version__ = '$Revision: 1.54 $'[11:-2]
-
-
-import string, re
-import operator
-
-from Globals import Persistent
-from Acquisition import Implicit
-from Splitter import Splitter
-from zLOG import LOG, ERROR
-from Lexicon import Lexicon
-from ResultList import ResultList
-from types import *
-
-from BTrees.IOBTree import IOBTree
-from BTrees.OIBTree import OIBTree
-from BTrees.IIBTree import IIBTree, IIBucket, IISet, IITreeSet
-from BTrees.IIBTree import difference, weightedIntersection
-
-
-AndNot = 'andnot'
-And = 'and'
-Or = 'or'
-Near = '...'
-QueryError = 'TextIndex.QueryError'
-
-
-class UnTextIndex(Persistent, Implicit):
-    """Full-text index.
-
-    There is a ZCatalog UML model that sheds some light on what is
-    going on here.  '_index' is a BTree which maps word ids to mapping
-    from document id to score.  Something like:
-
-      {'bob' : {1 : 5, 2 : 3, 42 : 9}}
-      {'uncle' : {1 : 1}}
-
-
-    The '_unindex' attribute is a mapping from document id to word
-    ids.  This mapping allows the catalog to unindex an object:
-
-      {42 : ('bob', 'is', 'your', 'uncle')
-
-    This isn't exactly how things are represented in memory, many
-    optimizations happen along the way."""
-    
-    isDeprecatedIndex = 1
-
-    meta_type = 'Text Index'
-
-
-    def __init__(self, id, ignore_ex=None, call_methods=None, lexicon=None):
-        """Create an index
-
-        The arguments are:
-
-          'id' -- the name of the item attribute to index.  This is
-          either an attribute name or a record key.
-
-          'ignore_ex' -- Tells the indexer to ignore exceptions that
-          are rasied when indexing an object.
-
-          'call_methods' -- Tells the indexer to call methods instead
-          of getattr or getitem to get an attribute.
-
-          'lexicon' is the lexicon object to specify, if None, the
-          index will use a private lexicon."""
-
-        self.id = id
-        self.ignore_ex = ignore_ex
-        self.call_methods = call_methods
-
-        self.clear()
-
-        if lexicon is None:
-            ## if no lexicon is provided, create a default one
-            self._lexicon = Lexicon()
-        else:
-            # We need to hold a reference to the lexicon, since we can't
-            # really change lexicons.
-            self._lexicon = self.getLexicon(lexicon)
-
-
-    def getLexicon(self, vocab_id):
-        """Return the Lexicon in use.
-
-        Bit of a hack, indexes have been made acquirers so that they
-        can acquire a vocabulary object from the object system in
-        Zope.  I don't think indexes were ever intended to participate
-        in this way, but I don't see too much of a problem with it."""
-
-        if type(vocab_id) is not StringType:
-            vocab = vocab_id # we already havd the lexicon
-            return vocab
-        else:
-            vocab = getattr(self, vocab_id)
-            return vocab.lexicon
-
-    def __nonzero__(self):
-        return not not self._unindex
-
-    # Too expensive
-    #def __len__(self):
-    #    """Return the number of objects indexed."""
-    #    return len(self._unindex)
-
-
-    def clear(self):
-        """Reinitialize the text index."""
-        self._index = IOBTree()
-        self._unindex = IOBTree()
-
-    def _convertBTrees(self, threshold=200):
-
-        if type(self._lexicon) is type(''):
-            # Turn the name reference into a hard reference.
-            self._lexicon=self.getLexicon(self._lexicon)
-
-        if type(self._index) is IOBTree: return
-
-        from BTrees.convert import convert
-
-        _index=self._index
-        self._index=IOBTree()
-
-        def convertScores(scores,
-                          type=type, TupleType=TupleType, IIBTree=IIBTree
-                          ):
-            if type(scores) is not TupleType and type(scores) is not IIBTree():
-                scores=IIBTree(scores)
-            return scores
-
-
-        convert(_index, self._index, threshold, convertScores)
-
-        _unindex=self._unindex
-        self._unindex=IOBTree()
-        convert(_unindex, self._unindex, threshold)
-
-    def histogram(self, type=type, TupleType=type(())):
-        """Return a mapping which provides a histogram of the number of
-        elements found at each point in the index."""
-
-        histogram = IIBucket()
-        for (key, value) in self._index.items():
-            if type(value) is TupleType: entry=1
-            else: entry = len(value)
-            histogram[entry] = histogram.get(entry, 0) + 1
-
-        return histogram
-
-
-    def getEntryForObject(self, rid, default=None):
-        """Get all information contained for a specific object.
-
-        This takes the objects record ID as it's main argument."""
-
-        wordMap = self.getLexicon(self._lexicon)._lexicon.items()
-        results = self._unindex.get(rid, None)
-
-        if results is None:
-            return default
-        else:
-            return tuple(map(self.getLexicon(self._lexicon).getWord,
-                             results))
-
-
-    def insertForwardIndexEntry(self, entry, documentId, score=1):
-        """Uses the information provided to update the indexes.
-
-        The basic logic for choice of data structure is based on
-        the number of entries as follows:
-
-            1      tuple
-            2-4    dictionary
-            5+     bucket.
-        """
-
-        index=self._index
-        indexRow = index.get(entry, None)
-
-        if indexRow is not None:
-            if type(indexRow) is TupleType:
-                # Tuples are only used for rows which have only
-                # a single entry.  Since we now need more, we'll
-                # promote it to a mapping object (dictionary).
-
-                # First, make sure we're not already in it, if so
-                # update the score if necessary.
-                if indexRow[0] == documentId:
-                    if indexRow[1] != score:
-                        indexRow = (documentId, score)
-                        index[entry] = indexRow
-                else:
-                    indexRow={
-                        indexRow[0]: indexRow[1],
-                        documentId: score,
-                        }
-                    index[entry] = indexRow
-            else:
-                if indexRow.get(documentId, -1) != score:
-                    # score changed (or new entry)
-
-                    if type(indexRow) is DictType:
-                        indexRow[documentId] = score
-                        if len(indexRow) > 3:
-                            # Big enough to give it's own database record
-                            indexRow=IIBTree(indexRow)
-                        index[entry] = indexRow
-                    else:
-                        indexRow[documentId] = score
-        else:
-            # We don't have any information at this point, so we'll
-            # put our first entry in, and use a tuple to save space
-            index[entry] = (documentId, score)
-
-    def index_object(self, documentId, obj, threshold=None):
-        """ Index an object:
-        'documentId' is the integer id of the document
-
-        'obj' is the objects to be indexed
-
-        'threshold' is the number of words to process between
-        commiting subtransactions.  If 'None' subtransactions are
-        disabled. """
-
-        # sniff the object for our 'id', the 'document source' of the
-        # index is this attribute.  If it smells callable, call it.
-        try:
-            source = getattr(obj, self.id)
-            if callable(source):
-                source = str(source())
-            else:
-                source = str(source)
-        except (AttributeError, TypeError):
-            return 0
-
-        lexicon = self.getLexicon(self._lexicon)
-        splitter=lexicon.Splitter
-
-        wordScores = OIBTree()
-        last = None
-
-        # Run through the words and score them
-        for word in splitter(source):
-            if word[0] == '\"':
-                last = self._subindex(word[1:-1], wordScores, last, splitter)
-            else:
-                if word==last: continue
-                last=word
-                wordScores[word]=wordScores.get(word,0)+1
-
-        # Convert scores to use wids:
-        widScores=IIBucket()
-        getWid=lexicon.getWordId
-        for word, score in wordScores.items():
-            widScores[getWid(word)]=score
-
-        del wordScores
-
-        currentWids=IISet(self._unindex.get(documentId, []))
-
-        # Get rid of document words that are no longer indexed
-        self.unindex_objectWids(documentId, difference(currentWids, widScores))
-
-        # Now index the words. Note that the new xIBTrees are clever
-        # enough to do nothing when there isn't a change. Woo hoo.
-        insert=self.insertForwardIndexEntry
-        for wid, score in widScores.items():
-            insert(wid, documentId, score)
-
-        # Save the unindexing info if it's changed:
-        wids=widScores.keys()
-        if wids != currentWids.keys():
-            self._unindex[documentId]=wids
-
-        return len(wids)
-
-    def _subindex(self, source, wordScores, last, splitter):
-        """Recursively handle multi-word synonyms"""
-        for word in splitter(source):
-            if word[0] == '\"':
-                last = self._subindex(word[1:-1], wordScores, last, splitter)
-            else:
-                if word==last: continue
-                last=word
-                wordScores[word]=wordScores.get(word,0)+1
-
-        return last
-
-    def unindex_object(self, i):
-        """ carefully unindex document with integer id 'i' from the text
-        index and do not fail if it does not exist """
-
-        index = self._index
-        unindex = self._unindex
-        wids = unindex.get(i, None)
-        if wids is not None:
-            self.unindex_objectWids(i, wids)
-            del unindex[i]
-
-    def unindex_objectWids(self, i, wids):
-        """ carefully unindex document with integer id 'i' from the text
-        index and do not fail if it does not exist """
-
-        index = self._index
-        get=index.get
-        for wid in wids:
-            widScores = get(wid, None)
-            if widScores is None:
-                LOG('UnTextIndex', ERROR,
-                    'unindex_object tried to unindex nonexistent'
-                    ' document, wid  %s, %s' % (i,wid))
-                continue
-            if type(widScores) is TupleType:
-                del index[wid]
-            else:
-                try:
-                    del widScores[i]
-                    if widScores:
-                        if type(widScores) is DictType:
-                            if len(widScores) == 1:
-                                # convert to tuple
-                                widScores = widScores.items()[0]
-                            index[wid]=widScores
-                    else:
-                        del index[wid]
-                except (KeyError, IndexError, TypeError):
-                    LOG('UnTextIndex', ERROR,
-                        'unindex_object tried to unindex nonexistent'
-                        ' document %s' % str(i))
-
-    def __getitem__(self, word):
-        """Return an InvertedIndex-style result "list"
-
-        Note that this differentiates between being passed an Integer
-        and a String.  Strings are looked up in the lexicon, whereas
-        Integers are assumed to be resolved word ids. """
-
-        if isinstance(word, IntType):
-            # We have a word ID
-            result = self._index.get(word, {})
-            return ResultList(result, (word,), self)
-        else:
-            splitSource = tuple(self.getLexicon(self._lexicon).Splitter(word))
-
-            if not splitSource:
-                return ResultList({}, (word,), self)
-
-            if len(splitSource) == 1:
-                splitSource = splitSource[0]
-                if splitSource[:1] == splitSource[-1:] == '"':
-                    return self[splitSource]
-
-                wids=self.getLexicon(self._lexicon).get(splitSource)
-                if wids:
-                    r = self._index.get(wids[0], None)
-                    if r is None:
-                        r = {}
-                else:
-                    r={}
-
-                return ResultList(r, (splitSource,), self)
-
-            r = None
-            for word in splitSource:
-                rr = self[word]
-                if r is None:
-                    r = rr
-                else:
-                    r = r.near(rr)
-
-            return r
-
-
-    def _apply_index(self, request, cid=''):
-        """ Apply the index to query parameters given in the argument,
-        request
-
-        The argument should be a mapping object.
-
-        If the request does not contain the needed parameters, then
-        None is returned.
-
-        Otherwise two objects are returned.  The first object is a
-        ResultSet containing the record numbers of the matching
-        records.  The second object is a tuple containing the names of
-        all data fields used.
-        """
-        if request.has_key(self.id):
-            keys = request[self.id]
-        else:
-            return None
-
-        operators = {
-            'andnot':AndNot,
-            'and':And,
-            'near':Near,
-            'or':Or
-            }
-
-        query_operator = Or
-        # We default to 'or' if we aren't passed an operator in the request
-        # or if we can't make sense of the passed-in operator
-
-        if request.has_key('textindex_operator'):
-            op=string.lower(str(request['textindex_operator']))
-            query_operator = operators.get(op, query_operator)
-
-        if type(keys) is StringType:
-            if not keys or not string.strip(keys):
-                return None
-            keys = [keys]
-
-        r = None
-
-        for key in keys:
-            key = string.strip(key)
-            if not key:
-                continue
-
-            b = self.query(key, query_operator).bucket()
-            w, r = weightedIntersection(r, b)
-
-        if r is not None:
-            return r, (self.id,)
-
-        return (IIBucket(), (self.id,))
-
-
-    def positions(self, docid, words,
-                  # This was never tested: obj
-                  ):
-        """Return the positions in the document for the given document
-        id of the word, word."""
-
-        return [1]
-
-        #################################################################
-        # The code below here is broken and requires an API change to fix
-        # it. Waaaaa.
-
-
-        if self._schema is None:
-            f = getattr
-        else:
-            f = operator.__getitem__
-            id = self._schema[self.id]
-
-
-        if self.call_methods:
-            doc = str(f(obj, self.id)())
-        else:
-            doc = str(f(obj, self.id))
-
-        r = []
-        for word in words:
-            r = r+self.getLexicon(self._lexicon).Splitter(doc).indexes(word)
-        return r
-
-
-
-    def query(self, s, default_operator=Or):
-        """ Evaluate a query string.
-
-        Convert the query string into a data structure of nested lists
-        and strings, based on the grouping of whitespace-separated
-        strings by parentheses and quotes.  The 'Near' operator is
-        inserted between the strings of a quoted group.
-
-        The Lexicon is given the opportunity to transform the
-        data structure.  Stemming, wildcards, and translation are
-        possible Lexicon services.
-
-        Finally, the query list is normalized so that it and every
-        sub-list consist of non-operator strings or lists separated
-        by operators. This list is evaluated.
-        """
-
-        # First replace any occurences of " and not " with " andnot "
-        s = re.sub('(?i)\s+and\s*not\s+', ' andnot ', s)
-
-        # Parse parentheses and quotes
-        q = parse(s)
-
-        # Allow the Lexicon to process the query
-        q = self.getLexicon(self._lexicon).query_hook(q)
-
-        # Insert the default operator between any two search terms not
-        # already joined by an operator.
-        q = parse2(q, default_operator)
-
-        # evalute the final 'expression'
-        return self.evaluate(q)
-
-
-    def get_operands(self, q, i):
-        """Evaluate and return the left and right operands for an operator"""
-        try:
-            left  = q[i - 1]
-            right = q[i + 1]
-        except IndexError:
-            raise QueryError, "Malformed query"
-
-        operandType = type(left)
-        if operandType is IntType:
-            left = self[left]
-        elif operandType is StringType:
-            left = self[left]
-        elif operandType is ListType:
-            left = self.evaluate(left)
-
-        operandType = type(right)
-        if operandType is IntType:
-            right = self[right]
-        elif operandType is StringType:
-            right = self[right]
-        elif operandType is ListType:
-            right = self.evaluate(right)
-
-        return (left, right)
-
-
-    def evaluate(self, query):
-        """Evaluate a parsed query"""
-        # Strip off meaningless layers
-        while isinstance(query, ListType) and len(query) == 1:
-            query = query[0]
-
-        # If it's not a list, assume a string or number
-        if not isinstance(query, ListType):
-            return self[query]
-
-        # Now we need to loop through the query and reduce
-        # operators.  They are currently evaluated in the following
-        # order: AndNot -> And -> Or -> Near
-        i = 0
-        while (i < len(query)):
-            if query[i] is AndNot:
-                left, right = self.get_operands(query, i)
-                val = left.and_not(right)
-                query[(i - 1) : (i + 2)] = [ val ]
-            else: i = i + 1
-
-        i = 0
-        while (i < len(query)):
-            if query[i] is And:
-                left, right = self.get_operands(query, i)
-                val = left & right
-                query[(i - 1) : (i + 2)] = [ val ]
-            else: i = i + 1
-
-        i = 0
-        while (i < len(query)):
-            if query[i] is Or:
-                left, right = self.get_operands(query, i)
-                val = left | right
-                query[(i - 1) : (i + 2)] = [ val ]
-            else: i = i + 1
-
-        i = 0
-        while (i < len(query)):
-            if query[i] is Near:
-                left, right = self.get_operands(query, i)
-                val = left.near(right)
-                query[(i - 1) : (i + 2)] = [ val ]
-            else: i = i + 1
-
-        if (len(query) != 1): raise QueryError, "Malformed query"
-
-        return query[0]
-
-
-def parse(s):
-    """Parse parentheses and quotes"""
-    l = []
-    tmp = string.lower(s)
-
-    p = parens(tmp)
-    while p is not None:
-        # Look for quotes in the section of the string before
-        # the parentheses, then parse the string inside the parens
-        l = l + quotes(p[0])
-        l.append(parse(p[1]))
-
-        # continue looking through the rest of the string
-        tmp = p[2]
-        p = parens(tmp)
-
-    return l + quotes(tmp)
-
-def parse2(q, default_operator,
-           operator_dict={AndNot: AndNot, And: And, Or: Or, Near: Near}):
-    """Find operators and operands"""
-    isop = operator_dict.has_key
-    i = 0
-    while i < len(q):
-        e = q[i]
-        if isinstance(e, ListType):
-            q[i] = parse2(e, default_operator)
-            if i % 2:
-                q.insert(i, default_operator)
-                i = i + 1
-        elif i % 2:
-            # This element should be an operator
-            if isop(e):
-                # Ensure that it is identical, not merely equal.
-                q[i] = operator_dict[e]
-            else:
-                # Insert the default operator.
-                q.insert(i, default_operator)
-                i = i + 1
-        i = i + 1
-
-    return q
-
-
-def parens(s, parens_re=re.compile('[()]').search):
-    mo = parens_re(s)
-    if mo is None:
-        return
-
-    open_index = mo.start(0) + 1
-    paren_count = 0
-    while mo is not None:
-        index = mo.start(0)
-
-        if s[index] == '(':
-            paren_count = paren_count + 1
-        else:
-            paren_count = paren_count - 1
-            if paren_count == 0:
-                return (s[:open_index - 1], s[open_index:index],
-                        s[index + 1:])
-            if paren_count < 0:
-                break
-        mo = parens_re(s, index + 1)
-
-    raise QueryError, "Mismatched parentheses"
-
-
-def quotes(s):
-    split=string.split
-    if '"' not in s:
-        return split(s)
-
-    # split up quoted regions
-    splitted = re.split('\s*\"\s*', s)
-
-    if (len(splitted) % 2) == 0: raise QueryError, "Mismatched quotes"
-
-    for i in range(1,len(splitted),2):
-        # split the quoted region into words
-        words = splitted[i] = split(splitted[i])
-
-        # put the Proxmity operator in between quoted words
-        j = len(words) - 1
-        while j > 0:
-            words.insert(j, Near)
-            j = j - 1
-
-    i = len(splitted) - 1
-    while i >= 0:
-        # split the non-quoted region into words
-        splitted[i:i+1] = split(splitted[i])
-        i = i - 2
-
-    return filter(None, splitted)
--- a/lib/python/SearchIndex/__init__.py
+++ b/lib/python/SearchIndex/__init__.py
-##############################################################################
-#
-# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-##############################################################################
-__doc__='''Collected utilities to support database indexing.
-
-
-$Id: __init__.py,v 1.10 2002/08/14 21:46:24 mj Exp $'''
-__version__='$Revision: 1.10 $'[11:-2]
-import warnings
-warnings.warn("The usage of the SearchIndex package is deprecated since \
-Zope 2.4.\n\
-This package is only kept for backwards compatibility for a while\n\
-and will go away in a future release.\n\
-\n\
-Please use instead the re-factored modules in Products/PluginIndexes.\n\
-",DeprecationWarning)
--- a/lib/python/SearchIndex/randid.py
+++ b/lib/python/SearchIndex/randid.py
-##############################################################################
-#
-# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE
-#
-#############################################################################
-
-import whrandom
-
-def randid(randint=whrandom.randint, choice=whrandom.choice, signs=(-1,1)):
-    return choice(signs)*randint(1,2000000000)
-
-del whrandom