Commit 87f5ab49 authored by Jim Fulton's avatar Jim Fulton

Rearranged index method to update result lists in a separate

overridable method.  This is needed to implement a clear method
in a subclass that allows an inverted index to be "cleared" without
actually updating data records.

Made some slight optimizations.
parent 30403796
...@@ -30,7 +30,7 @@ Example usage: ...@@ -30,7 +30,7 @@ Example usage:
print i['blah'] print i['blah']
$Id: InvertedIndex.py,v 1.22 1997/03/22 13:02:17 jim Exp $''' $Id: InvertedIndex.py,v 1.23 1997/03/22 13:32:23 jim Exp $'''
# Copyright # Copyright
# #
# Copyright 1996 Digital Creations, L.C., 910 Princess Anne # Copyright 1996 Digital Creations, L.C., 910 Princess Anne
...@@ -82,6 +82,14 @@ $Id: InvertedIndex.py,v 1.22 1997/03/22 13:02:17 jim Exp $''' ...@@ -82,6 +82,14 @@ $Id: InvertedIndex.py,v 1.22 1997/03/22 13:02:17 jim Exp $'''
# (540) 371-6909 # (540) 371-6909
# #
# $Log: InvertedIndex.py,v $ # $Log: InvertedIndex.py,v $
# Revision 1.23 1997/03/22 13:32:23 jim
# Rearranged index method to update result lists in a separate
# overridable method. This is needed to implement a clear method
# in a subclass that allows an inverted index to be "cleared" without
# actually updating data records.
#
# Made some slight optimizations.
#
# Revision 1.22 1997/03/22 13:02:17 jim # Revision 1.22 1997/03/22 13:02:17 jim
# Finish fixing bug in __or__ that Chris has started to fix. # Finish fixing bug in __or__ that Chris has started to fix.
# #
...@@ -156,10 +164,11 @@ $Id: InvertedIndex.py,v 1.22 1997/03/22 13:02:17 jim Exp $''' ...@@ -156,10 +164,11 @@ $Id: InvertedIndex.py,v 1.22 1997/03/22 13:02:17 jim Exp $'''
# #
# #
# #
__version__='$Revision: 1.22 $'[11:-2] __version__='$Revision: 1.23 $'[11:-2]
import regex, regsub, string, copy import regex, regsub, string, copy
from string import lower
from types import * from types import *
...@@ -477,11 +486,10 @@ class Index: ...@@ -477,11 +486,10 @@ class Index:
import math import math
List = self.list_class
index = self._index_object index = self._index_object
src = regsub.gsub('-[ \t]*\n[ \t]*', '', str(src)) # de-hyphenate src = regsub.gsub('-[ \t]*\n[ \t]*', '', str(src)) # de-hyphenate
src = filter(None, self.split_words(src)) src = map(lower,filter(None, self.split_words(src)))
if (len(src) < 2): if (len(src) < 2):
raise IndexingError, 'cannot index document with fewer than two keywords' raise IndexingError, 'cannot index document with fewer than two keywords'
...@@ -492,7 +500,6 @@ class Index: ...@@ -492,7 +500,6 @@ class Index:
i = -1 i = -1
for s in src: for s in src:
i = i + 1 i = i + 1
s = string.lower(s)
stopword_flag = 0 stopword_flag = 0
while (not stopword_flag): while (not stopword_flag):
...@@ -515,13 +522,18 @@ class Index: ...@@ -515,13 +522,18 @@ class Index:
except KeyError: except KeyError:
d[s] = [ i ] d[s] = [ i ]
for s in d.keys(): addentry=self.addentry
freq = int(10000 * (len(d[s]) / nwords)) for word,positions in d.items():
try: freq = int(10000 * (len(positions) / nwords))
index[s].addentry(srckey, freq, d[s]) addentry(word,srckey,(freq, positions))
except KeyError:
index[s] = List({srckey : (freq, d[s])})
def addentry(self,word,key,data):
index=self._index_object
try: rl=index[word]
except:
rl=self.list_class()
index[word]=rl
rl[key]=data
def __getitem__(self, key): def __getitem__(self, key):
'''\ '''\
...@@ -559,7 +571,7 @@ class Index: ...@@ -559,7 +571,7 @@ class Index:
return reduce(lambda x, y: x | y, Lists) return reduce(lambda x, y: x | y, Lists)
key = string.lower(key) key = lower(key)
while (type(key) == StringType): while (type(key) == StringType):
try: try:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment