Commit 3095fce7 authored by Andreas Jung's avatar Andreas Jung

removed 'SearchIndex' package

parent 2dc887a3
...@@ -24,6 +24,8 @@ Zope Changes ...@@ -24,6 +24,8 @@ Zope Changes
Features added Features added
- The obsolete 'SearchIndex' packages has been removed
- Traversal now supports a "post traversal hook" that get's run - Traversal now supports a "post traversal hook" that get's run
after traversal finished and the security context is established. after traversal finished and the security context is established.
......
# Nothing to see here (deprecated module).
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
#############################################################################
from Lexicon import Lexicon
from Splitter import Splitter
from UnTextIndex import Or
import re, string
from BTrees.IIBTree import IISet, union, IITreeSet
from BTrees.OIBTree import OIBTree
from BTrees.IOBTree import IOBTree
from BTrees.OOBTree import OOBTree
from randid import randid
class GlobbingLexicon(Lexicon):
"""Lexicon which supports basic globbing function ('*' and '?').
This lexicon keeps several data structures around that are useful
for searching. They are:
'_lexicon' -- Contains the mapping from word => word_id
'_inverseLex' -- Contains the mapping from word_id => word
'_digrams' -- Contains a mapping from digram => word_id
Before going further, it is necessary to understand what a digram is,
as it is a core component of the structure of this lexicon. A digram
is a two-letter sequence in a word. For example, the word 'zope'
would be converted into the digrams::
['$z', 'zo', 'op', 'pe', 'e$']
where the '$' is a word marker. It is used at the beginning and end
of the words. Those digrams are significant.
"""
multi_wc = '*'
single_wc = '?'
eow = '$'
def __init__(self):
self.clear()
def clear(self):
self._lexicon = OIBTree()
self._inverseLex = IOBTree()
self._digrams = OOBTree()
def _convertBTrees(self, threshold=200):
Lexicon._convertBTrees(self, threshold)
if type(self._digrams) is OOBTree: return
from BTrees.convert import convert
_digrams=self._digrams
self._digrams=OOBTree()
self._digrams._p_jar=self._p_jar
convert(_digrams, self._digrams, threshold, IITreeSet)
def createDigrams(self, word):
"""Returns a list with the set of digrams in the word."""
digrams = list(word)
digrams.append(self.eow)
last = self.eow
for i in range(len(digrams)):
last, digrams[i] = digrams[i], last + digrams[i]
return digrams
def getWordId(self, word):
"""Provided 'word', return the matching integer word id."""
if self._lexicon.has_key(word):
return self._lexicon[word]
else:
return self.assignWordId(word)
set = getWordId # Kludge for old code
def getWord(self, wid):
return self._inverseLex.get(wid, None)
def assignWordId(self, word):
"""Assigns a new word id to the provided word, and return it."""
# Double check it's not in the lexicon already, and if it is, just
# return it.
if self._lexicon.has_key(word):
return self._lexicon[word]
# Get word id. BBB Backward compat pain.
inverse=self._inverseLex
try: insert=inverse.insert
except AttributeError:
# we have an "old" BTree object
if inverse:
wid=inverse.keys()[-1]+1
else:
self._inverseLex=IOBTree()
wid=1
inverse[wid] = word
else:
# we have a "new" IOBTree object
wid=randid()
while not inverse.insert(wid, word):
wid=randid()
self._lexicon[word] = wid
# Now take all the digrams and insert them into the digram map.
for digram in self.createDigrams(word):
set = self._digrams.get(digram, None)
if set is None:
self._digrams[digram] = set = IISet()
set.insert(wid)
return wid
def get(self, pattern):
""" Query the lexicon for words matching a pattern."""
wc_set = [self.multi_wc, self.single_wc]
digrams = []
globbing = 0
for i in range(len(pattern)):
if pattern[i] in wc_set:
globbing = 1
continue
if i == 0:
digrams.insert(i, (self.eow + pattern[i]) )
digrams.append((pattern[i] + pattern[i+1]))
else:
try:
if pattern[i+1] not in wc_set:
digrams.append( pattern[i] + pattern[i+1] )
except IndexError:
digrams.append( (pattern[i] + self.eow) )
if not globbing:
result = self._lexicon.get(pattern, None)
if result is None:
return ()
return (result, )
## now get all of the intsets that contain the result digrams
result = None
for digram in digrams:
result=union(result, self._digrams.get(digram, None))
if not result:
return ()
else:
## now we have narrowed the list of possible candidates
## down to those words which contain digrams. However,
## some words may have been returned that match digrams,
## but do not match 'pattern'. This is because some words
## may contain all matching digrams, but in the wrong
## order.
expr = re.compile(self.createRegex(pattern))
words = []
hits = IISet()
for x in result:
if expr.match(self._inverseLex[x]):
hits.insert(x)
return hits
def __getitem__(self, word):
""" """
return self.get(word)
def query_hook(self, q):
"""expand wildcards"""
ListType = type([])
i = len(q) - 1
while i >= 0:
e = q[i]
if isinstance(e, ListType):
self.query_hook(e)
elif ( (self.multi_wc in e) or
(self.single_wc in e) ):
wids = self.get(e)
words = []
for wid in wids:
if words:
words.append(Or)
words.append(wid)
if not words:
# if words is empty, return something that will make
# textindex's __getitem__ return an empty result list
words.append('')
q[i] = words
i = i - 1
return q
def Splitter(self, astring, words=None):
""" wrap the splitter """
## don't do anything, less efficient but there's not much
## sense in stemming a globbing lexicon.
return Splitter(astring)
def createRegex(self, pat):
"""Translate a PATTERN to a regular expression.
There is no way to quote meta-characters.
"""
# Remove characters that are meaningful in a regex
transTable = string.maketrans("", "")
result = string.translate(pat, transTable,
r'()&|!@#$%^{}\<>.')
# First, deal with multi-character globbing
result = string.replace(result, '*', '.*')
# Next, we need to deal with single-character globbing
result = string.replace(result, '?', '.')
return "%s$" % result
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""Simple column indices"""
__version__='$Revision: 1.31 $'[11:-2]
from Persistence import Persistent
from BTrees.OOBTree import OOBTree
from BTrees.IIBTree import IITreeSet
import operator
from Missing import MV
import string
ListType=type([])
StringType=type('s')
def nonEmpty(s):
"returns true if a non-empty string or any other (nonstring) type"
if type(s) is StringType:
if s: return 1
else: return 0
else:
return 1
class Index(Persistent):
"""Index object interface"""
isDeprecatedIndex = 1
def __init__(self, data=None, schema=None, id=None,
ignore_ex=None, call_methods=None):
"""Create an index
The arguments are:
'data' -- a mapping from integer object ids to objects or
records,
'schema' -- a mapping from item name to index into data
records. If 'data' is a mapping to objects, then schema
should ne 'None'.
'id' -- the name of the item attribute to index. This is
either an attribute name or a record key.
"""
######################################################################
# For b/w compatability, have to allow __init__ calls with zero args
if not data==schema==id==ignore_ex==call_methods==None:
self._data = data
self._schema = schema
self.id = id
self.ignore_ex=ignore_ex
self.call_methods=call_methods
self._index = OOBTree()
self._reindex()
else:
pass
# for b/w compatability
_init = __init__
def dpHasUniqueValuesFor(self, name):
' has unique values for column NAME '
if name == self.id:
return 1
else:
return 0
def dpUniqueValues(self, name=None, withLengths=0):
"""\
returns the unique values for name
if withLengths is true, returns a sequence of
tuples of (value, length)
"""
if name is None:
name = self.id
elif name != self.id:
return []
if not withLengths: return tuple(
filter(nonEmpty,self._index.keys())
)
else:
rl=[]
for i in self._index.keys():
if not nonEmpty(i): continue
else: rl.append((i, len(self._index[i])))
return tuple(rl)
def clear(self):
self._index = OOBTree()
def _reindex(self, start=0):
"""Recompute index data for data with ids >= start."""
index=self._index
get=index.get
if not start: index.clear()
id = self.id
if self._schema is None:
f=getattr
else:
f = operator.__getitem__
id = self._schema[id]
for i,row in self._data.items(start):
k=f(row,id)
if k is None or k == MV: continue
set=get(k)
if set is None: index[k] = set = IITreeSet()
set.insert(i)
def index_item(self, i, obj=None):
"""Recompute index data for data with ids >= start."""
index = self._index
id = self.id
if (self._schema is None) or (obj is not None):
f = getattr
else:
f = operator.__getitem__
id = self._schema[id]
if obj is None:
obj = self._data[i]
try: k=f(obj, id)
except: return
if self.call_methods:
k=k()
if k is None or k == MV:
return
set = index.get(k)
if set is None: index[k] = set = IITreeSet()
set.insert(i)
def unindex_item(self, i, obj=None):
"""Recompute index data for data with ids >= start."""
index = self._index
id = self.id
if self._schema is None:
f = getattr
else:
f = operator.__getitem__
id = self._schema[id]
if obj is None:
obj = self._data[i]
try: k=f(obj, id)
except: return
if self.call_methods:
k=k()
if k is None or k == MV:
return
set = index.get(k)
if set is not None: set.remove(i)
def _apply_index(self, request, cid=''):
"""Apply the index to query parameters given in the argument,
request
The argument should be a mapping object.
If the request does not contain the needed parameters, then
None is returned.
If the request contains a parameter with the name of the
column + '_usage', it is sniffed for information on how to
handle applying the index.
Otherwise two objects are returned. The first object is a
ResultSet containing the record numbers of the matching
records. The second object is a tuple containing the names of
all data fields used.
"""
id = self.id #name of the column
cidid = "%s/%s" % (cid,id)
has_key = request.has_key
if has_key(cidid): keys = request[cidid]
elif has_key(id): keys = request[id]
else: return None
if type(keys) is not ListType: keys=[keys]
index = self._index
r = None
anyTrue = 0
opr = None
if request.has_key(id+'_usage'):
# see if any usage params are sent to field
opr=string.split(string.lower(request[id+"_usage"]),':')
opr, opr_args=opr[0], opr[1:]
if opr=="range":
if 'min' in opr_args: lo = min(keys)
else: lo = None
if 'max' in opr_args: hi = max(keys)
else: hi = None
anyTrue=1
try:
if hi: setlist = index.items(lo,hi)
else: setlist = index.items(lo)
for k,set in setlist:
w, r = weightedUnion(r, set)
except KeyError: pass
else: #not a range
get = index.get
for key in keys:
if key: anyTrue = 1
set=get(key)
if set is not None:
w, r = weightedUnion(r, set)
if r is None:
if anyTrue: r=IISet()
else: return None
return r, (id,)
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
__doc__=""" Module breaks out Zope specific methods and behavior. In
addition, provides the Lexicon class which defines a word to integer
mapping.
"""
from Splitter import Splitter
from Persistence import Persistent
from Acquisition import Implicit
from BTrees.OIBTree import OIBTree
from BTrees.IOBTree import IOBTree
from BTrees.IIBTree import IISet, IITreeSet
from randid import randid
class Lexicon(Persistent, Implicit):
"""Maps words to word ids and then some
The Lexicon object is an attempt to abstract vocabularies out of
Text indexes. This abstraction is not totally cooked yet, this
module still includes the parser for the 'Text Index Query
Language' and a few other hacks.
"""
# default for older objects
stop_syn={}
def __init__(self, stop_syn=None):
self.clear()
if stop_syn is None:
self.stop_syn = {}
else:
self.stop_syn = stop_syn
def clear(self):
self._lexicon = OIBTree()
self._inverseLex = IOBTree()
def _convertBTrees(self, threshold=200):
if (type(self._lexicon) is OIBTree and
type(getattr(self, '_inverseLex', None)) is IOBTree):
return
from BTrees.convert import convert
lexicon=self._lexicon
self._lexicon=OIBTree()
self._lexicon._p_jar=self._p_jar
convert(lexicon, self._lexicon, threshold)
try:
inverseLex=self._inverseLex
self._inverseLex=IOBTree()
except AttributeError:
# older lexicons didn't have an inverse lexicon
self._inverseLex=IOBTree()
inverseLex=self._inverseLex
self._inverseLex._p_jar=self._p_jar
convert(inverseLex, self._inverseLex, threshold)
def set_stop_syn(self, stop_syn):
""" pass in a mapping of stopwords and synonyms. Format is:
{'word' : [syn1, syn2, ..., synx]}
Vocabularies do not necesarily need to implement this if their
splitters do not support stemming or stoping.
"""
self.stop_syn = stop_syn
def getWordId(self, word):
""" return the word id of 'word' """
wid=self._lexicon.get(word, None)
if wid is None:
wid=self.assignWordId(word)
return wid
set = getWordId
def getWord(self, wid):
""" post-2.3.1b2 method, will not work with unconverted lexicons """
return self._inverseLex.get(wid, None)
def assignWordId(self, word):
"""Assigns a new word id to the provided word and returns it."""
# First make sure it's not already in there
if self._lexicon.has_key(word):
return self._lexicon[word]
try: inverse=self._inverseLex
except AttributeError:
# woops, old lexicom wo wids
inverse=self._inverseLex=IOBTree()
for word, wid in self._lexicon.items():
inverse[wid]=word
wid=randid()
while not inverse.insert(wid, word):
wid=randid()
self._lexicon[intern(word)] = wid
return wid
def get(self, key, default=None):
"""Return the matched word against the key."""
r=IISet()
wid=self._lexicon.get(key, default)
if wid is not None: r.insert(wid)
return r
def __getitem__(self, key):
return self.get(key)
def __len__(self):
return len(self._lexicon)
def Splitter(self, astring, words=None):
""" wrap the splitter """
if words is None:
words = self.stop_syn
return Splitter(astring, words)
def query_hook(self, q):
""" we don't want to modify the query cuz we're dumb """
return q
stop_words=(
'am', 'ii', 'iii', 'per', 'po', 're', 'a', 'about', 'above', 'across',
'after', 'afterwards', 'again', 'against', 'all', 'almost', 'alone',
'along', 'already', 'also', 'although', 'always', 'am', 'among',
'amongst', 'amoungst', 'amount', 'an', 'and', 'another', 'any',
'anyhow', 'anyone', 'anything', 'anyway', 'anywhere', 'are', 'around',
'as', 'at', 'back', 'be', 'became', 'because', 'become', 'becomes',
'becoming', 'been', 'before', 'beforehand', 'behind', 'being',
'below', 'beside', 'besides', 'between', 'beyond', 'bill', 'both',
'bottom', 'but', 'by', 'can', 'cannot', 'cant', 'con', 'could',
'couldnt', 'cry', 'describe', 'detail', 'do', 'done', 'down', 'due',
'during', 'each', 'eg', 'eight', 'either', 'eleven', 'else',
'elsewhere', 'empty', 'enough', 'even', 'ever', 'every', 'everyone',
'everything', 'everywhere', 'except', 'few', 'fifteen', 'fifty',
'fill', 'find', 'fire', 'first', 'five', 'for', 'former', 'formerly',
'forty', 'found', 'four', 'from', 'front', 'full', 'further', 'get',
'give', 'go', 'had', 'has', 'hasnt', 'have', 'he', 'hence', 'her',
'here', 'hereafter', 'hereby', 'herein', 'hereupon', 'hers',
'herself', 'him', 'himself', 'his', 'how', 'however', 'hundred', 'i',
'ie', 'if', 'in', 'inc', 'indeed', 'interest', 'into', 'is', 'it',
'its', 'itself', 'keep', 'last', 'latter', 'latterly', 'least',
'less', 'made', 'many', 'may', 'me', 'meanwhile', 'might', 'mill',
'mine', 'more', 'moreover', 'most', 'mostly', 'move', 'much', 'must',
'my', 'myself', 'name', 'namely', 'neither', 'never', 'nevertheless',
'next', 'nine', 'no', 'nobody', 'none', 'noone', 'nor', 'not',
'nothing', 'now', 'nowhere', 'of', 'off', 'often', 'on', 'once',
'one', 'only', 'onto', 'or', 'other', 'others', 'otherwise', 'our',
'ours', 'ourselves', 'out', 'over', 'own', 'per', 'perhaps',
'please', 'pre', 'put', 'rather', 're', 'same', 'see', 'seem',
'seemed', 'seeming', 'seems', 'serious', 'several', 'she', 'should',
'show', 'side', 'since', 'sincere', 'six', 'sixty', 'so', 'some',
'somehow', 'someone', 'something', 'sometime', 'sometimes',
'somewhere', 'still', 'such', 'take', 'ten', 'than', 'that', 'the',
'their', 'them', 'themselves', 'then', 'thence', 'there',
'thereafter', 'thereby', 'therefore', 'therein', 'thereupon', 'these',
'they', 'thick', 'thin', 'third', 'this', 'those', 'though', 'three',
'through', 'throughout', 'thru', 'thus', 'to', 'together', 'too',
'toward', 'towards', 'twelve', 'twenty', 'two', 'un', 'under',
'until', 'up', 'upon', 'us', 'very', 'via', 'was', 'we', 'well',
'were', 'what', 'whatever', 'when', 'whence', 'whenever', 'where',
'whereafter', 'whereas', 'whereby', 'wherein', 'whereupon',
'wherever', 'whether', 'which', 'while', 'whither', 'who', 'whoever',
'whole', 'whom', 'whose', 'why', 'will', 'with', 'within', 'without',
'would', 'yet', 'you', 'your', 'yours', 'yourself', 'yourselves',
)
stop_word_dict={}
for word in stop_words: stop_word_dict[word]=None
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""Pluggable Index Base Class """
__version__='$Revision: 1.4 $'[11:-2]
import Interface
class PluggableIndex:
"""Base pluggable index class"""
def getEntryForObject(self, documentId, default=None):
"""Get all information contained for a specific object by documentId"""
pass
def index_object(self, documentId, obj, threshold=None):
"""Index an object:
'documentId' is the integer ID of the document
'obj' is the object to be indexed
'threshold' is the number of words to process between committing
subtransactions. If None, subtransactions are disabled"""
pass
def unindex_object(self, documentId):
"""Remove the documentId from the index"""
pass
def uniqueValues(self, name=None, withLengths=0):
"""Returns the unique values for name.
If 'withLengths' is true, returns a sequence of tuples of
(value, length)"""
pass
def _apply_index(self, request, cid=''):
"""Apply the index to query parameters given in the argument, request.
The argument should be a mapping object.
If the request does not contain the needed parametrs, then None is
returned.
If the request contains a parameter with the name of the column
+ "_usage", it is sniffed for information on how to handle applying
the index.
Otherwise two objects are returned. The first object is a ResultSet
containing the record numbers of the matching records. The second
object is a tuple containing the names of all data fields used."""
pass
PluggableIndexInterface = Interface.impliedInterface(PluggableIndex)
PluggableIndex.__implements__ = PluggableIndexInterface
The SearchIndex package is deprecated since Zope 2.4
Instead use the re-factored modules in Products/PluginIndexes.
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
from BTrees.IIBTree import IIBucket
from BTrees.IIBTree import weightedIntersection, weightedUnion, difference
from BTrees.OOBTree import OOSet, union
class ResultList:
def __init__(self, d, words, index, TupleType=type(())):
self._index = index
if type(words) is not OOSet: words=OOSet(words)
self._words = words
if (type(d) is TupleType):
d = IIBucket((d,))
elif type(d) is not IIBucket:
d = IIBucket(d)
self._dict=d
self.__getitem__=d.__getitem__
try: self.__nonzero__=d.__nonzero__
except: pass
self.get=d.get
def __nonzero__(self):
return not not self._dict
def bucket(self): return self._dict
def keys(self): return self._dict.keys()
def has_key(self, key): return self._dict.has_key(key)
def items(self): return self._dict.items()
def __and__(self, x):
return self.__class__(
weightedIntersection(self._dict, x._dict)[1],
union(self._words, x._words),
self._index,
)
def and_not(self, x):
return self.__class__(
difference(self._dict, x._dict),
self._words,
self._index,
)
def __or__(self, x):
return self.__class__(
weightedUnion(self._dict, x._dict)[1],
union(self._words, x._words),
self._index,
)
return self.__class__(result, self._words+x._words, self._index)
def near(self, x):
result = IIBucket()
dict = self._dict
xdict = x._dict
xhas = xdict.has_key
positions = self._index.positions
for id, score in dict.items():
if not xhas(id): continue
p=(map(lambda i: (i,0), positions(id,self._words))+
map(lambda i: (i,1), positions(id,x._words)))
p.sort()
d = lp = 9999
li = None
lsrc = None
for i,src in p:
if i is not li and src is not lsrc and li is not None:
d = min(d,i-li)
li = i
lsrc = src
if d==lp: score = min(score,xdict[id]) # synonyms
else: score = (score+xdict[id])/d
result[id] = score
return self.__class__(
result, union(self._words, x._words), self._index)
*shared*
Splitter Splitter.c
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
from UnIndex import UnIndex
from zLOG import LOG, ERROR
from types import StringType
from BTrees.OOBTree import OOSet, difference
class UnKeywordIndex(UnIndex):
meta_type = 'Keyword Index'
"""Like an UnIndex only it indexes sequences of items
Searches match any keyword.
This should have an _apply_index that returns a relevance score
"""
def index_object(self, documentId, obj, threshold=None):
""" index an object 'obj' with integer id 'i'
Ideally, we've been passed a sequence of some sort that we
can iterate over. If however, we haven't, we should do something
useful with the results. In the case of a string, this means
indexing the entire string as a keyword."""
# First we need to see if there's anything interesting to look at
# self.id is the name of the index, which is also the name of the
# attribute we're interested in. If the attribute is callable,
# we'll do so.
newKeywords = getattr(obj, self.id, ())
if callable(newKeywords):
newKeywords = newKeywords()
if type(newKeywords) is StringType:
newKeywords = (newKeywords, )
oldKeywords = self._unindex.get(documentId, None)
if oldKeywords is None:
# we've got a new document, let's not futz around.
try:
for kw in newKeywords:
self.insertForwardIndexEntry(kw, documentId)
self._unindex[documentId] = list(newKeywords)
except TypeError:
return 0
else:
# we have an existing entry for this document, and we need
# to figure out if any of the keywords have actually changed
if type(oldKeywords) is not OOSet: oldKeywords=OOSet(oldKeywords)
newKeywords=OOSet(newKeywords)
fdiff = difference(oldKeywords, newKeywords)
rdiff = difference(newKeywords, oldKeywords)
if fdiff or rdiff:
# if we've got forward or reverse changes
self._unindex[documentId] = list(newKeywords)
if fdiff:
self.unindex_objectKeywords(documentId, fdiff)
if rdiff:
for kw in rdiff:
self.insertForwardIndexEntry(kw, documentId)
return 1
def unindex_objectKeywords(self, documentId, keywords):
""" carefully unindex the object with integer id 'documentId'"""
if keywords is not None:
for kw in keywords:
self.removeForwardIndexEntry(kw, documentId)
def unindex_object(self, documentId):
""" carefully unindex the object with integer id 'documentId'"""
keywords = self._unindex.get(documentId, None)
self.unindex_objectKeywords(documentId, keywords)
try:
del self._unindex[documentId]
except KeyError:
LOG('UnKeywordIndex', ERROR, 'Attempt to unindex nonexistent'
' document id %s' % documentId)
This diff is collapsed.
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
__doc__='''Collected utilities to support database indexing.
$Id: __init__.py,v 1.10 2002/08/14 21:46:24 mj Exp $'''
__version__='$Revision: 1.10 $'[11:-2]
import warnings
warnings.warn("The usage of the SearchIndex package is deprecated since \
Zope 2.4.\n\
This package is only kept for backwards compatibility for a while\n\
and will go away in a future release.\n\
\n\
Please use instead the re-factored modules in Products/PluginIndexes.\n\
",DeprecationWarning)
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
#############################################################################
import whrandom
def randid(randint=whrandom.randint, choice=whrandom.choice, signs=(-1,1)):
return choice(signs)*randint(1,2000000000)
del whrandom
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment