Commit 2594a81b authored by Casey Duncan's avatar Casey Duncan

Okapi index now works w/zope.

Removed QueryParser as a persistent attribute of the ZCTextIndex so that
it doesn't need to be persistent (It stores no state).

Updated tests. Functionally tested in Zope.
parent b975f42a
......@@ -27,6 +27,9 @@ from Products.ZCTextIndex import WidCode
from Products.ZCTextIndex.SetOps import mass_weightedIntersection, \
mass_weightedUnion
import ZODB
from Persistence import Persistent
# Instead of storing floats, we generally store scaled ints. Binary pickles
# can store those more efficiently. The default SCALE_FACTOR of 1024
# is large enough to get about 3 decimal digits of fractional info, and
......@@ -43,7 +46,7 @@ def scaled_int(f, scale=SCALE_FACTOR):
# expensive.
return int(f * scale + 0.5)
class Index:
class Index(Persistent):
__implements__ = IIndex
......@@ -78,6 +81,10 @@ class Index:
"""Return the number of documents in the index."""
return len(self._docwords)
def get_words(self, docid):
"""Returns the wordids for a given docid"""
return WidCode.decode(self._docwords[docid])
def index_doc(self, docid, text):
wids = self._lexicon.sourceToWordIds(text)
self._doclen[docid] = len(wids)
......@@ -88,6 +95,7 @@ class Index:
self._add_wordinfo(wid, count, docid)
self._docwords[docid] = WidCode.encode(wids)
return len(wids)
def unindex_doc(self, docid):
for wid in WidCode.decode(self._docwords[docid]):
......
......@@ -62,7 +62,6 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
self.lexicon = lexicon
self.index = index_factory(self.lexicon)
self.parser = QueryParser()
## Pluggable Index APIs ##
......@@ -89,13 +88,13 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
if record.keys is None:
return None
query_str = ' '.join(record.keys)
tree = self.parser.parseQuery(query_str)
tree = QueryParser().parseQuery(query_str)
results = tree.executeQuery(self.index)
return results, (self._fieldname,)
def query(self, query, nbest=10):
# returns a mapping from docids to scores
tree = self.parser.parseQuery(query)
tree = QueryParser().parseQuery(query)
results = tree.executeQuery(self.index)
chooser = NBest(nbest)
chooser.addmany(results.items())
......
......@@ -4,6 +4,7 @@ from Products.ZCTextIndex.tests \
from Products.ZCTextIndex.Index import scaled_int, SCALE_FACTOR, Index
from Products.ZCTextIndex.Lexicon import Lexicon, Splitter
from Products.ZCTextIndex.Lexicon import CaseNormalizer, StopWordRemover
from Products.ZCTextIndex.QueryParser import QueryParser
import unittest
......@@ -110,7 +111,7 @@ class IndexTests(testIndex.IndexTest):
[(1, 0.19), (2, 0.18), (3, 0.63), (5, 0.22), (6, 0.39)]]
for i in range(len(queries)):
raw = queries[i]
q = self.zc_index.parser.parseQuery(raw)
q = QueryParser().parseQuery(raw)
wq = self.index.query_weight(q.terms())
eq(wq, scaled_int(wqs[i]))
r = self.zc_index.query(raw)
......@@ -142,7 +143,7 @@ class QueryTests(testQueryEngine.TestQueryEngine,
caller = LexiconHolder(Lexicon(Splitter(), CaseNormalizer(),
StopWordRemover()))
self.zc_index = ZCTextIndex('name', extra, caller)
self.p = self.parser = self.zc_index.parser
self.p = self.parser = QueryParser()
self.index = self.zc_index.index
self.add_docs()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment