Commit bd532bbe authored by Guido van Rossum's avatar Guido van Rossum

QueryParser refactoring step 1: add the lexicon to the constructor args.

parent 97fbb9c9
...@@ -92,8 +92,8 @@ _tokenizer_regex = re.compile(r""" ...@@ -92,8 +92,8 @@ _tokenizer_regex = re.compile(r"""
class QueryParser: class QueryParser:
def __init__(self): def __init__(self, lexicon):
pass # This parser has no persistent state self._lexicon = lexicon
def parseQuery(self, query): def parseQuery(self, query):
# Lexical analysis. # Lexical analysis.
......
...@@ -39,14 +39,18 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -39,14 +39,18 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
__implements__ = PluggableIndexInterface __implements__ = PluggableIndexInterface
## Magic class attributes ##
meta_type = 'ZCTextIndex' meta_type = 'ZCTextIndex'
manage_options= ( manage_options = (
{'label': 'Settings', 'action': 'manage_main'}, {'label': 'Settings', 'action': 'manage_main'},
) )
query_options = ['query'] query_options = ['query']
## Constructor ##
def __init__(self, id, extra, caller, index_factory=OkapiIndex): def __init__(self, id, extra, caller, index_factory=OkapiIndex):
self.id = id self.id = id
self._fieldname = extra.doc_attr self._fieldname = extra.doc_attr
...@@ -64,13 +68,15 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -64,13 +68,15 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
self._index_factory = index_factory self._index_factory = index_factory
self.clear() self.clear()
## External methods not in the Pluggable Index API ##
def query(self, query, nbest=10): def query(self, query, nbest=10):
"""Return pair (mapping from docids to scores, num results). """Return pair (mapping from docids to scores, num results).
The num results is the total number of results before trimming The num results is the total number of results before trimming
to the nbest results. to the nbest results.
""" """
tree = QueryParser().parseQuery(query) tree = QueryParser(self.lexicon).parseQuery(query)
results = tree.executeQuery(self.index) results = tree.executeQuery(self.index)
if results is None: if results is None:
return [], 0 return [], 0
...@@ -107,7 +113,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -107,7 +113,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
if record.keys is None: if record.keys is None:
return None return None
query_str = ' '.join(record.keys) query_str = ' '.join(record.keys)
tree = QueryParser().parseQuery(query_str) tree = QueryParser(self.lexicon).parseQuery(query_str)
results = tree.executeQuery(self.index) results = tree.executeQuery(self.index)
return results, (self._fieldname,) return results, (self._fieldname,)
...@@ -120,10 +126,14 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -120,10 +126,14 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
get_word = self.lexicon.get_word get_word = self.lexicon.get_word
return [get_word(wid) for wid in word_ids] return [get_word(wid) for wid in word_ids]
## XXX To which API does this conform? ##
def clear(self): def clear(self):
"""reinitialize the index""" """reinitialize the index"""
self.index = self._index_factory(self.lexicon) self.index = self._index_factory(self.lexicon)
## Helper ##
def _get_object_text(self, obj): def _get_object_text(self, obj):
x = getattr(obj, self._fieldname) x = getattr(obj, self._fieldname)
if callable(x): if callable(x):
......
...@@ -394,7 +394,7 @@ class TextIndex(Persistent): ...@@ -394,7 +394,7 @@ class TextIndex(Persistent):
def query(self, query, nbest=10): def query(self, query, nbest=10):
# returns a total hit count and a mapping from docids to scores # returns a total hit count and a mapping from docids to scores
parser = QueryParser() parser = QueryParser(self.lexicon)
tree = parser.parseQuery(query) tree = parser.parseQuery(query)
results = tree.executeQuery(self.index) results = tree.executeQuery(self.index)
if results is None: if results is None:
...@@ -404,7 +404,7 @@ class TextIndex(Persistent): ...@@ -404,7 +404,7 @@ class TextIndex(Persistent):
return chooser.getbest(), len(results) return chooser.getbest(), len(results)
def query_weight(self, query): def query_weight(self, query):
parser = QueryParser() parser = QueryParser(self.lexicon)
tree = parser.parseQuery(query) tree = parser.parseQuery(query)
terms = tree.terms() terms = tree.terms()
return self.index.query_weight(terms) return self.index.query_weight(terms)
......
...@@ -18,6 +18,7 @@ from BTrees.IIBTree import IIBucket ...@@ -18,6 +18,7 @@ from BTrees.IIBTree import IIBucket
from Products.ZCTextIndex.QueryParser import QueryParser from Products.ZCTextIndex.QueryParser import QueryParser
from Products.ZCTextIndex.ParseTree import ParseError, QueryError from Products.ZCTextIndex.ParseTree import ParseError, QueryError
from Products.ZCTextIndex.Lexicon import Lexicon, Splitter
class FauxIndex: class FauxIndex:
...@@ -34,7 +35,8 @@ class FauxIndex: ...@@ -34,7 +35,8 @@ class FauxIndex:
class TestQueryEngine(TestCase): class TestQueryEngine(TestCase):
def setUp(self): def setUp(self):
self.parser = QueryParser() self.lexicon = Lexicon(Splitter())
self.parser = QueryParser(self.lexicon)
self.index = FauxIndex() self.index = FauxIndex()
def compareSet(self, set, dict): def compareSet(self, set, dict):
......
...@@ -19,6 +19,7 @@ from Products.ZCTextIndex.QueryParser import QueryParser ...@@ -19,6 +19,7 @@ from Products.ZCTextIndex.QueryParser import QueryParser
from Products.ZCTextIndex.ParseTree import ParseError, ParseTreeNode from Products.ZCTextIndex.ParseTree import ParseError, ParseTreeNode
from Products.ZCTextIndex.ParseTree import OrNode, AndNode, NotNode from Products.ZCTextIndex.ParseTree import OrNode, AndNode, NotNode
from Products.ZCTextIndex.ParseTree import AtomNode, PhraseNode, GlobNode from Products.ZCTextIndex.ParseTree import AtomNode, PhraseNode, GlobNode
from Products.ZCTextIndex.Lexicon import Lexicon, Splitter
class TestQueryParser(TestCase): class TestQueryParser(TestCase):
...@@ -54,7 +55,8 @@ class TestQueryParser(TestCase): ...@@ -54,7 +55,8 @@ class TestQueryParser(TestCase):
self.assertRaises(ParseError, self.p.parseQuery, input) self.assertRaises(ParseError, self.p.parseQuery, input)
def setUp(self): def setUp(self):
self.p = QueryParser() self.lexicon = Lexicon(Splitter())
self.p = QueryParser(self.lexicon)
def testParseQuery(self): def testParseQuery(self):
self.expect("foo", AtomNode("foo")) self.expect("foo", AtomNode("foo"))
......
...@@ -265,7 +265,7 @@ class CosineIndexTests(ZCIndexTestsBase, testIndex.CosineIndexTest): ...@@ -265,7 +265,7 @@ class CosineIndexTests(ZCIndexTestsBase, testIndex.CosineIndexTest):
[(1, 0.19), (2, 0.18), (3, 0.63), (5, 0.22), (6, 0.39)]] [(1, 0.19), (2, 0.18), (3, 0.63), (5, 0.22), (6, 0.39)]]
for i in range(len(queries)): for i in range(len(queries)):
raw = queries[i] raw = queries[i]
q = QueryParser().parseQuery(raw) q = QueryParser(self.lexicon).parseQuery(raw)
wq = self.index.query_weight(q.terms()) wq = self.index.query_weight(q.terms())
eq(wq, scaled_int(wqs[i])) eq(wq, scaled_int(wqs[i]))
r, n = self.zc_index.query(raw) r, n = self.zc_index.query(raw)
...@@ -428,10 +428,11 @@ class QueryTestsBase(testQueryEngine.TestQueryEngine, ...@@ -428,10 +428,11 @@ class QueryTestsBase(testQueryEngine.TestQueryEngine,
extra = Extra() extra = Extra()
extra.doc_attr = 'text' extra.doc_attr = 'text'
extra.lexicon_id = 'lexicon' extra.lexicon_id = 'lexicon'
caller = LexiconHolder(Lexicon(Splitter(), CaseNormalizer(), self.lexicon = Lexicon(Splitter(), CaseNormalizer(),
StopWordRemover())) StopWordRemover())
caller = LexiconHolder(self.lexicon)
self.zc_index = ZCTextIndex('name', extra, caller, self.IndexFactory) self.zc_index = ZCTextIndex('name', extra, caller, self.IndexFactory)
self.p = self.parser = QueryParser() self.p = self.parser = QueryParser(self.lexicon)
self.index = self.zc_index.index self.index = self.zc_index.index
self.add_docs() self.add_docs()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment