Commit ea795e5a authored by Guido van Rossum's avatar Guido van Rossum

Fix queries of the form 'extension module C'.

parent 9319c8e0
......@@ -115,6 +115,10 @@ class CosineIndex(Persistent):
def search(self, term):
wids = self._lexicon.termToWordIds(term)
if not wids:
return None # All docs match
if 0 in wids:
wids = filter(None, wids)
return mass_weightedUnion(self._search_wids(wids))
def search_glob(self, pattern):
......@@ -123,6 +127,8 @@ class CosineIndex(Persistent):
def search_phrase(self, phrase):
wids = self._lexicon.termToWordIds(phrase)
if 0 in wids:
return IIBTree()
hits = mass_weightedIntersection(self._search_wids(wids))
if not hits:
return hits
......@@ -157,6 +163,8 @@ class CosineIndex(Persistent):
N = float(len(self._docweight))
sum = 0.0
for wid in wids:
if wid == 0:
continue
wt = math.log(1.0 + N / len(self._wordinfo[wid]))
sum += wt ** 2.0
return scaled_int(math.sqrt(sum))
......
......@@ -62,9 +62,7 @@ class Lexicon:
last = element.process(last)
wids = []
for word in last:
wid = self._wids.get(word)
if wid is not None:
wids.append(wid)
wids.append(self._wids.get(word, 0))
return wids
def get_word(self, wid):
......
......@@ -109,6 +109,10 @@ class OkapiIndex(Persistent):
def search(self, term):
wids = self._lexicon.termToWordIds(term)
if not wids:
return None # All docs match
if 0 in wids:
wids = filter(None, wids)
return mass_weightedUnion(self._search_wids(wids))
def search_glob(self, pattern):
......@@ -117,6 +121,8 @@ class OkapiIndex(Persistent):
def search_phrase(self, phrase):
wids = self._lexicon.termToWordIds(phrase)
if 0 in wids:
return IIBTree()
hits = mass_weightedIntersection(self._search_wids(wids))
if not hits:
return hits
......
......@@ -20,10 +20,10 @@ from Products.ZCTextIndex.NBest import NBest
def mass_weightedIntersection(L):
"A list of (mapping, weight) pairs -> their weightedIntersection IIBTree."
L = [(map, weight) for (map, weight) in L if map is not None]
if not L:
return IIBTree()
# Intersect with smallest first.
L = L[:] # don't mutate the caller's L
L.sort(lambda x, y: cmp(len(x[0]), len(y[0])))
x, w = L[0]
dummy, result = weightedUnion(IIBTree(), x, 1, w)
......
......@@ -72,6 +72,8 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
"""
tree = QueryParser().parseQuery(query)
results = tree.executeQuery(self.index)
if results is None:
return [], 0
chooser = NBest(nbest)
chooser.addmany(results.items())
return chooser.getbest(), len(results)
......
......@@ -143,7 +143,7 @@ class Indexer:
if not text:
continue
try:
n, results = self.timequery(text, top + nbest)
results, n = self.timequery(text, top + nbest)
except:
reportexc()
text = ""
......@@ -163,7 +163,7 @@ class Indexer:
top += nbest
def query(self, text, nbest=NBEST, maxlines=MAXLINES):
n, results = self.timequery(text, nbest)
results, n = self.timequery(text, nbest)
if not n:
print "No hits for %r." % text
return
......@@ -173,11 +173,11 @@ class Indexer:
def timequery(self, text, nbest):
t0 = time.time()
c0 = time.clock()
n, results = self.index.query(text, nbest)
results, n = self.index.query(text, nbest)
t1 = time.time()
c1 = time.clock()
print "[Query time: %.3f real, %.3f user]" % (t1-t0, c1-c0)
return n, results
return results, n
def formatresults(self, text, results, maxlines=MAXLINES,
lo=0, hi=sys.maxint):
......@@ -397,9 +397,11 @@ class TextIndex(Persistent):
parser = QueryParser()
tree = parser.parseQuery(query)
results = tree.executeQuery(self.index)
if results is None:
return [], 0
chooser = NBest(nbest)
chooser.addmany(results.items())
return len(results), chooser.getbest()
return chooser.getbest(), len(results)
def query_weight(self, query):
parser = QueryParser()
......
......@@ -76,7 +76,7 @@ class Test(TestCase):
lexicon = Lexicon(Splitter())
wids = lexicon.sourceToWordIds('cats and dogs')
wids = lexicon.termToWordIds('boxes')
self.assertEqual(wids, [])
self.assertEqual(wids, [0])
def testOnePipelineElement(self):
lexicon = Lexicon(Splitter(), StupidPipelineElement('dogs', 'fish'))
......@@ -94,7 +94,7 @@ class Test(TestCase):
lexicon = Lexicon(Splitter())
wids = lexicon.sourceToWordIds('CATS and dogs')
wids = lexicon.termToWordIds('cats and dogs')
self.assertEqual(wids, [2, 3])
self.assertEqual(wids, [0, 2, 3])
def testTwoElementPipeline(self):
lexicon = Lexicon(Splitter(),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment