Commit bd532bbe authored by Guido van Rossum's avatar Guido van Rossum

QueryParser refactoring step 1: add the lexicon to the constructor args.

parent 97fbb9c9
......@@ -92,8 +92,8 @@ _tokenizer_regex = re.compile(r"""
class QueryParser:
def __init__(self):
pass # This parser has no persistent state
def __init__(self, lexicon):
self._lexicon = lexicon
def parseQuery(self, query):
# Lexical analysis.
......
......@@ -39,14 +39,18 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
__implements__ = PluggableIndexInterface
## Magic class attributes ##
meta_type = 'ZCTextIndex'
manage_options= (
manage_options = (
{'label': 'Settings', 'action': 'manage_main'},
)
query_options = ['query']
## Constructor ##
def __init__(self, id, extra, caller, index_factory=OkapiIndex):
self.id = id
self._fieldname = extra.doc_attr
......@@ -64,13 +68,15 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
self._index_factory = index_factory
self.clear()
## External methods not in the Pluggable Index API ##
def query(self, query, nbest=10):
"""Return pair (mapping from docids to scores, num results).
The num results is the total number of results before trimming
to the nbest results.
"""
tree = QueryParser().parseQuery(query)
tree = QueryParser(self.lexicon).parseQuery(query)
results = tree.executeQuery(self.index)
if results is None:
return [], 0
......@@ -107,7 +113,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
if record.keys is None:
return None
query_str = ' '.join(record.keys)
tree = QueryParser().parseQuery(query_str)
tree = QueryParser(self.lexicon).parseQuery(query_str)
results = tree.executeQuery(self.index)
return results, (self._fieldname,)
......@@ -120,10 +126,14 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
get_word = self.lexicon.get_word
return [get_word(wid) for wid in word_ids]
## XXX To which API does this conform? ##
def clear(self):
"""reinitialize the index"""
self.index = self._index_factory(self.lexicon)
## Helper ##
def _get_object_text(self, obj):
x = getattr(obj, self._fieldname)
if callable(x):
......
......@@ -394,7 +394,7 @@ class TextIndex(Persistent):
def query(self, query, nbest=10):
# returns a total hit count and a mapping from docids to scores
parser = QueryParser()
parser = QueryParser(self.lexicon)
tree = parser.parseQuery(query)
results = tree.executeQuery(self.index)
if results is None:
......@@ -404,7 +404,7 @@ class TextIndex(Persistent):
return chooser.getbest(), len(results)
def query_weight(self, query):
parser = QueryParser()
parser = QueryParser(self.lexicon)
tree = parser.parseQuery(query)
terms = tree.terms()
return self.index.query_weight(terms)
......
......@@ -18,6 +18,7 @@ from BTrees.IIBTree import IIBucket
from Products.ZCTextIndex.QueryParser import QueryParser
from Products.ZCTextIndex.ParseTree import ParseError, QueryError
from Products.ZCTextIndex.Lexicon import Lexicon, Splitter
class FauxIndex:
......@@ -34,7 +35,8 @@ class FauxIndex:
class TestQueryEngine(TestCase):
def setUp(self):
self.parser = QueryParser()
self.lexicon = Lexicon(Splitter())
self.parser = QueryParser(self.lexicon)
self.index = FauxIndex()
def compareSet(self, set, dict):
......
......@@ -19,6 +19,7 @@ from Products.ZCTextIndex.QueryParser import QueryParser
from Products.ZCTextIndex.ParseTree import ParseError, ParseTreeNode
from Products.ZCTextIndex.ParseTree import OrNode, AndNode, NotNode
from Products.ZCTextIndex.ParseTree import AtomNode, PhraseNode, GlobNode
from Products.ZCTextIndex.Lexicon import Lexicon, Splitter
class TestQueryParser(TestCase):
......@@ -54,7 +55,8 @@ class TestQueryParser(TestCase):
self.assertRaises(ParseError, self.p.parseQuery, input)
def setUp(self):
self.p = QueryParser()
self.lexicon = Lexicon(Splitter())
self.p = QueryParser(self.lexicon)
def testParseQuery(self):
self.expect("foo", AtomNode("foo"))
......
......@@ -265,7 +265,7 @@ class CosineIndexTests(ZCIndexTestsBase, testIndex.CosineIndexTest):
[(1, 0.19), (2, 0.18), (3, 0.63), (5, 0.22), (6, 0.39)]]
for i in range(len(queries)):
raw = queries[i]
q = QueryParser().parseQuery(raw)
q = QueryParser(self.lexicon).parseQuery(raw)
wq = self.index.query_weight(q.terms())
eq(wq, scaled_int(wqs[i]))
r, n = self.zc_index.query(raw)
......@@ -428,10 +428,11 @@ class QueryTestsBase(testQueryEngine.TestQueryEngine,
extra = Extra()
extra.doc_attr = 'text'
extra.lexicon_id = 'lexicon'
caller = LexiconHolder(Lexicon(Splitter(), CaseNormalizer(),
StopWordRemover()))
self.lexicon = Lexicon(Splitter(), CaseNormalizer(),
StopWordRemover())
caller = LexiconHolder(self.lexicon)
self.zc_index = ZCTextIndex('name', extra, caller, self.IndexFactory)
self.p = self.parser = QueryParser()
self.p = self.parser = QueryParser(self.lexicon)
self.index = self.zc_index.index
self.add_docs()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment