Fix queries of the form 'extension module C'.

ea795e5a · Guido van Rossum · 9319c8e0 · ea795e5a · ea795e5a · ea795e5a
Commit ea795e5a authored May 16, 2002 by Guido van Rossum
7 changed files
--- a/lib/python/Products/ZCTextIndex/CosineIndex.py
+++ b/lib/python/Products/ZCTextIndex/CosineIndex.py
@@ -115,6 +115,10 @@ class CosineIndex(Persistent):

    def search(self, term):
        wids = self._lexicon.termToWordIds(term)
+        if not wids:
+            return None # All docs match
+        if 0 in wids:
+            wids = filter(None, wids)
        return mass_weightedUnion(self._search_wids(wids))

    def search_glob(self, pattern):
@@ -123,6 +127,8 @@ class CosineIndex(Persistent):

    def search_phrase(self, phrase):
        wids = self._lexicon.termToWordIds(phrase)
+        if 0 in wids:
+            return IIBTree()
        hits = mass_weightedIntersection(self._search_wids(wids))
        if not hits:
            return hits
@@ -157,6 +163,8 @@ class CosineIndex(Persistent):
        N = float(len(self._docweight))
        sum = 0.0
        for wid in wids:
+            if wid == 0:
+                continue
            wt = math.log(1.0 + N / len(self._wordinfo[wid]))
            sum += wt ** 2.0
        return scaled_int(math.sqrt(sum))

--- a/lib/python/Products/ZCTextIndex/Lexicon.py
+++ b/lib/python/Products/ZCTextIndex/Lexicon.py
@@ -62,9 +62,7 @@ class Lexicon:
            last = element.process(last)
        wids = []
        for word in last:
-            wid = self._wids.get(word)
-            if wid is not None:
-                wids.append(wid)
+            wids.append(self._wids.get(word, 0))
        return wids

    def get_word(self, wid):

--- a/lib/python/Products/ZCTextIndex/OkapiIndex.py
+++ b/lib/python/Products/ZCTextIndex/OkapiIndex.py
@@ -109,6 +109,10 @@ class OkapiIndex(Persistent):

    def search(self, term):
        wids = self._lexicon.termToWordIds(term)
+        if not wids:
+            return None # All docs match
+        if 0 in wids:
+            wids = filter(None, wids)
        return mass_weightedUnion(self._search_wids(wids))

    def search_glob(self, pattern):
@@ -117,6 +121,8 @@ class OkapiIndex(Persistent):

    def search_phrase(self, phrase):
        wids = self._lexicon.termToWordIds(phrase)
+        if 0 in wids:
+            return IIBTree()
        hits = mass_weightedIntersection(self._search_wids(wids))
        if not hits:
            return hits

--- a/lib/python/Products/ZCTextIndex/SetOps.py
+++ b/lib/python/Products/ZCTextIndex/SetOps.py
@@ -20,10 +20,10 @@ from Products.ZCTextIndex.NBest import NBest

 def mass_weightedIntersection(L):
    "A list of (mapping, weight) pairs -> their weightedIntersection IIBTree."
+    L = [(map, weight) for (map, weight) in L if map is not None]
    if not L:
        return IIBTree()
    # Intersect with smallest first.
-    L = L[:]    # don't mutate the caller's L
    L.sort(lambda x, y: cmp(len(x[0]), len(y[0])))
    x, w = L[0]
    dummy, result = weightedUnion(IIBTree(), x, 1, w)

--- a/lib/python/Products/ZCTextIndex/ZCTextIndex.py
+++ b/lib/python/Products/ZCTextIndex/ZCTextIndex.py
@@ -72,6 +72,8 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
        """
        tree = QueryParser().parseQuery(query)
        results = tree.executeQuery(self.index)
+        if results is None:
+            return [], 0
        chooser = NBest(nbest)
        chooser.addmany(results.items())
        return chooser.getbest(), len(results)

--- a/lib/python/Products/ZCTextIndex/tests/mhindex.py
+++ b/lib/python/Products/ZCTextIndex/tests/mhindex.py
@@ -143,7 +143,7 @@ class Indexer:
                if not text:
                    continue
            try:
-                n, results = self.timequery(text, top + nbest)
+                results, n = self.timequery(text, top + nbest)
            except:
                reportexc()
                text = ""
@@ -163,7 +163,7 @@ class Indexer:
            top += nbest

    def query(self, text, nbest=NBEST, maxlines=MAXLINES):
-        n, results = self.timequery(text, nbest)
+        results, n = self.timequery(text, nbest)
        if not n:
            print "No hits for %r." % text
            return
@@ -173,11 +173,11 @@ class Indexer:
    def timequery(self, text, nbest):
        t0 = time.time()
        c0 = time.clock()
-        n, results = self.index.query(text, nbest)
+        results, n = self.index.query(text, nbest)
        t1 = time.time()
        c1 = time.clock()
        print "[Query time: %.3f real, %.3f user]" % (t1-t0, c1-c0)
-        return n, results
+        return results, n

    def formatresults(self, text, results, maxlines=MAXLINES,
                      lo=0, hi=sys.maxint):
@@ -397,9 +397,11 @@ class TextIndex(Persistent):
        parser = QueryParser()
        tree = parser.parseQuery(query)
        results = tree.executeQuery(self.index)
+        if results is None:
+            return [], 0
        chooser = NBest(nbest)
        chooser.addmany(results.items())
-        return len(results), chooser.getbest()
+        return chooser.getbest(), len(results)

    def query_weight(self, query):
        parser = QueryParser()

--- a/lib/python/Products/ZCTextIndex/tests/testLexicon.py
+++ b/lib/python/Products/ZCTextIndex/tests/testLexicon.py
@@ -76,7 +76,7 @@ class Test(TestCase):
        lexicon = Lexicon(Splitter())
        wids = lexicon.sourceToWordIds('cats and dogs')
        wids = lexicon.termToWordIds('boxes')
-        self.assertEqual(wids, [])
+        self.assertEqual(wids, [0])

    def testOnePipelineElement(self):
        lexicon = Lexicon(Splitter(), StupidPipelineElement('dogs', 'fish'))
@@ -94,7 +94,7 @@ class Test(TestCase):
        lexicon = Lexicon(Splitter())
        wids = lexicon.sourceToWordIds('CATS and dogs')
        wids = lexicon.termToWordIds('cats and dogs')
-        self.assertEqual(wids, [2, 3])
+        self.assertEqual(wids, [0, 2, 3])

    def testTwoElementPipeline(self):
        lexicon = Lexicon(Splitter(),