Commit 92a2995e authored by Jeremy Hylton's avatar Jeremy Hylton

Add an incorrect test of reindexing.

If we update a document and reindex it, ZCTextIndex is currently
broken.  The test passes py virtue of calling unindex_object() after
each update, then calling index_object() again.  We need to fix our
code, and then remove the calls to unindex_object() from the test.

XXX This code causes OkapiIndex to fail because it doesn't expect to
have no wordinfo for a wid.  I tried to fix this in CosineIndex, but I
want to Tim think more about it and try to fix OkapiIndex.
parent d9c0428f
...@@ -7,7 +7,9 @@ from Products.ZCTextIndex.OkapiIndex import OkapiIndex ...@@ -7,7 +7,9 @@ from Products.ZCTextIndex.OkapiIndex import OkapiIndex
from Products.ZCTextIndex.Lexicon import Lexicon, Splitter from Products.ZCTextIndex.Lexicon import Lexicon, Splitter
from Products.ZCTextIndex.Lexicon import CaseNormalizer, StopWordRemover from Products.ZCTextIndex.Lexicon import CaseNormalizer, StopWordRemover
from Products.ZCTextIndex.QueryParser import QueryParser from Products.ZCTextIndex.QueryParser import QueryParser
from Products.ZCTextIndex.StopDict import get_stopdict
import re
import unittest import unittest
class Indexable: class Indexable:
...@@ -33,6 +35,43 @@ def eq(scaled1, scaled2, epsilon=scaled_int(0.01)): ...@@ -33,6 +35,43 @@ def eq(scaled1, scaled2, epsilon=scaled_int(0.01)):
# Subclasses should derive from one of testIndex.{CosineIndexTest, # Subclasses should derive from one of testIndex.{CosineIndexTest,
# OkapiIndexTest} too. # OkapiIndexTest} too.
# a series of text chunks to use for the re-index tests
text = [
"""Here's a knocking indeed! If a
man were porter of hell-gate, he should have
old turning the key.""",
"""Knock,
knock, knock! Who's there, i' the name of
Beelzebub? Here's a farmer, that hanged
himself on the expectation of plenty: come in
time; have napkins enow about you; here
you'll sweat for't.""",
"""Knock,
knock! Who's there, in the other devil's
name? Faith, here's an equivocator, that could
swear in both the scales against either scale;
who committed treason enough for God's sake,
yet could not equivocate to heaven: O, come
in, equivocator.""",
"""Knock,
knock, knock! Who's there? Faith, here's an
English tailor come hither, for stealing out of
a French hose: come in, tailor; here you may
roast your goose.""",
"""Knock,
knock; never at quiet! What are you? But
this place is too cold for hell. I'll devil-porter
it no further: I had thought to have let in
some of all professions that go the primrose
way to the everlasting bonfire."""
]
class ZCIndexTestsBase: class ZCIndexTestsBase:
def setUp(self): def setUp(self):
...@@ -57,6 +96,42 @@ class ZCIndexTestsBase: ...@@ -57,6 +96,42 @@ class ZCIndexTestsBase:
self.assertEqual(wids, []) self.assertEqual(wids, [])
self.assertEqual(len(self.index._get_undoinfo(1)), 1) self.assertEqual(len(self.index._get_undoinfo(1)), 1)
def testDocUpdate(self):
docid = 1
stop = get_stopdict()
unique = {} # compute a set of unique words for each version
d = {} # find some common words
common = []
N = len(text)
for version, i in zip(text, range(N)):
# use a simple splitter rather than an official one
words = [w for w in re.split("\W+", version.lower())
if len(w) > 1 and not stop.has_key(w)]
# count occurences of each word
for w in words:
l = d[w] = d.get(w, [])
l.append(i)
for k, v in d.items():
if len(v) == 1:
v = v[0]
l = unique[v] = unique.get(v, [])
l.append(k)
elif len(v) == N:
common.append(k)
for version, i in zip(text, range(N)):
doc = Indexable(version)
self.zc_index.index_object(docid, doc)
for w in common:
nbest, total = self.zc_index.query(w)
self.assertEqual(total, 1, "did not find %s" % w)
for k, v in unique.items():
if k == i:
continue
for w in v:
nbest, total = self.zc_index.query(w)
self.assertEqual(total, 0, "did not expect to find %s" % w)
self.zc_index.unindex_object(docid)
class CosineIndexTests(ZCIndexTestsBase, testIndex.CosineIndexTest): class CosineIndexTests(ZCIndexTestsBase, testIndex.CosineIndexTest):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment