Commit 4ddbd1b5 authored by Jim Fulton's avatar Jim Fulton

Fixed several bugs in handling query parsing and proximity search.

parent 2b177725
...@@ -127,8 +127,8 @@ Notes on a new text index design ...@@ -127,8 +127,8 @@ Notes on a new text index design
$Id: TextIndex.py,v 1.5 1997/11/03 15:17:12 jim Exp $''' $Id: TextIndex.py,v 1.6 1997/11/03 18:59:59 jim Exp $'''
__version__='$Revision: 1.5 $'[11:-2] __version__='$Revision: 1.6 $'[11:-2]
from Globals import Persistent from Globals import Persistent
import BTree, IIBTree import BTree, IIBTree
...@@ -318,8 +318,8 @@ class TextIndex(Persistent): ...@@ -318,8 +318,8 @@ class TextIndex(Persistent):
if not key: continue if not key: continue
rr=intSet() rr=intSet()
try: try:
for i in query(key,self).keys(): for i,score in query(key,self).items():
rr.insert(i) if score: rr.insert(i)
except KeyError: pass except KeyError: pass
if r is None: r=rr if r is None: r=rr
else: else:
...@@ -389,7 +389,7 @@ class ResultList: ...@@ -389,7 +389,7 @@ class ResultList:
p=(map(lambda i: (i,0), positions(id,self._words))+ p=(map(lambda i: (i,0), positions(id,self._words))+
map(lambda i: (i,1), positions(id,x._words))) map(lambda i: (i,1), positions(id,x._words)))
p.sort() p.sort()
d=lp=len(p) d=lp=9999
li=None li=None
lsrc=None lsrc=None
for i,src in p: for i,src in p:
...@@ -409,6 +409,7 @@ AndNot = 'andnot' ...@@ -409,6 +409,7 @@ AndNot = 'andnot'
And = 'and' And = 'and'
Or = 'or' Or = 'or'
Near = '...' Near = '...'
QueryError='TextIndex.QueryError'
def query(s, index, default_operator = Or, def query(s, index, default_operator = Or,
ws = (string.whitespace,)): ws = (string.whitespace,)):
...@@ -491,14 +492,18 @@ def quotes(s, ws = (string.whitespace,)): ...@@ -491,14 +492,18 @@ def quotes(s, ws = (string.whitespace,)):
if (len(splitted) > 1): if (len(splitted) > 1):
if ((len(splitted) % 2) == 0): raise QueryError, "Mismatched quotes" if ((len(splitted) % 2) == 0): raise QueryError, "Mismatched quotes"
for i in range(1, len(splitted), 2): for i in range(1,len(splitted),2):
# split the quoted region into words # split the quoted region into words
splitted[i] = filter(None, split(splitted[i])) splitted[i] = filter(None, split(splitted[i]))
# put the Proxmity operator in between quoted words # put the Proxmity operator in between quoted words
for j in range(1, len(splitted[i])): for j in range(1, len(splitted[i])):
splitted[i][j : j] = [ Near ] splitted[i][j : j] = [ Near ]
for i in range(len(splitted)-1,-1,-2):
# split the non-quoted region into words
splitted[i:i+1] = filter(None, split(splitted[i]))
splitted = filter(None, splitted) splitted = filter(None, splitted)
else: else:
# No quotes, so just split the string into words # No quotes, so just split the string into words
...@@ -506,18 +511,20 @@ def quotes(s, ws = (string.whitespace,)): ...@@ -506,18 +511,20 @@ def quotes(s, ws = (string.whitespace,)):
return splitted return splitted
def get_operands(q, i, index, ListType=type([])): def get_operands(q, i, index, ListType=type([]), StringType=type('')):
'''Evaluate and return the left and right operands for an operator''' '''Evaluate and return the left and right operands for an operator'''
try: try:
left = q[i - 1] left = q[i - 1]
right = q[i + 1] right = q[i + 1]
except IndexError: raise QueryError, "Malformed query" except IndexError: raise QueryError, "Malformed query"
if (type(left) is ListType): left = evaluate(left, index) t=type(left)
else: left=index[left] if t is ListType: left = evaluate(left, index)
elif t is StringType: left=index[left]
if (type(right) is ListType): right = evaluate(right, index)
else: right = index[right] t=type(right)
if t is ListType: right = evaluate(right, index)
elif t is StringType: right=index[right]
return (left, right) return (left, right)
...@@ -594,7 +601,7 @@ stop_words=( ...@@ -594,7 +601,7 @@ stop_words=(
'next', 'nine', 'no', 'nobody', 'none', 'noone', 'nor', 'not', 'next', 'nine', 'no', 'nobody', 'none', 'noone', 'nor', 'not',
'nothing', 'now', 'nowhere', 'of', 'off', 'often', 'on', 'once', 'nothing', 'now', 'nowhere', 'of', 'off', 'often', 'on', 'once',
'one', 'only', 'onto', 'or', 'other', 'others', 'otherwise', 'our', 'one', 'only', 'onto', 'or', 'other', 'others', 'otherwise', 'our',
'ours', 'ourselves', 'out', 'over', 'own', 'part', 'per', 'perhaps', 'ours', 'ourselves', 'out', 'over', 'own', 'per', 'perhaps',
'please', 'pre', 'put', 'rather', 're', 'same', 'see', 'seem', 'please', 'pre', 'put', 'rather', 're', 'same', 'see', 'seem',
'seemed', 'seeming', 'seems', 'serious', 'several', 'she', 'should', 'seemed', 'seeming', 'seems', 'serious', 'several', 'she', 'should',
'show', 'side', 'since', 'sincere', 'six', 'sixty', 'so', 'some', 'show', 'side', 'since', 'sincere', 'six', 'sixty', 'so', 'some',
...@@ -619,6 +626,9 @@ for word in stop_words: stop_word_dict[word]=None ...@@ -619,6 +626,9 @@ for word in stop_words: stop_word_dict[word]=None
############################################################################## ##############################################################################
# #
# $Log: TextIndex.py,v $ # $Log: TextIndex.py,v $
# Revision 1.6 1997/11/03 18:59:59 jim
# Fixed several bugs in handling query parsing and proximity search.
#
# Revision 1.5 1997/11/03 15:17:12 jim # Revision 1.5 1997/11/03 15:17:12 jim
# Updated to use new indexing strategy. Now, no longer store positions # Updated to use new indexing strategy. Now, no longer store positions
# in index, but get them on demand from doc. # in index, but get them on demand from doc.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment