Commit ec145751 authored by Tres Seaver's avatar Tres Seaver

LP #142478: normalize terms passed to ``PLexicon.queryLexicon``

o Use the lexicon's pipeline (e.g., case flattening, stop word removal, etc.)

o Forward-port from 2.12 branch.
parent 78a7ee19
...@@ -153,6 +153,9 @@ Features Added ...@@ -153,6 +153,9 @@ Features Added
Bugs Fixed Bugs Fixed
++++++++++ ++++++++++
- LP #142478: normalize terms passed to ``PLexicon.queryLexicon`` using
the lexicon's pipeline (e.g., case flattening, stop word removal, etc.)
- LP #143604: Removed top-level database-quota-size from zope.conf, some - LP #143604: Removed top-level database-quota-size from zope.conf, some
storages support a quota option instead. storages support a quota option instead.
......
...@@ -358,7 +358,7 @@ class PLexicon(Lexicon, Implicit, SimpleItem): ...@@ -358,7 +358,7 @@ class PLexicon(Lexicon, Implicit, SimpleItem):
""" """
if words: if words:
wids = [] wids = []
for word in words: for word in self.parseTerms(words):
wids.extend(self.globToWordIds(word)) wids.extend(self.globToWordIds(word))
words = [self.get_word(wid) for wid in wids] words = [self.get_word(wid) for wid in wids]
else: else:
...@@ -384,16 +384,20 @@ class PLexicon(Lexicon, Implicit, SimpleItem): ...@@ -384,16 +384,20 @@ class PLexicon(Lexicon, Implicit, SimpleItem):
columns.append(words[i:i + rows]) columns.append(words[i:i + rows])
i += rows i += rows
return self._queryLexicon(self, REQUEST, info = dict(page=page,
page=page, rows=rows,
rows=rows, cols=cols,
cols=cols, start_word=start+1,
start_word=start+1, end_word=end,
end_word=end, word_count=word_count,
word_count=word_count, page_count=page_count,
page_count=page_count, page_range=xrange(page_count),
page_range=xrange(page_count), page_columns=columns)
page_columns=columns)
if REQUEST is not None:
return self._queryLexicon(self, REQUEST, **info)
return info
security.declareProtected(LexiconMgmtPerm, 'manage_main') security.declareProtected(LexiconMgmtPerm, 'manage_main')
manage_main = DTMLFile('dtml/manageLexicon', globals()) manage_main = DTMLFile('dtml/manageLexicon', globals())
......
...@@ -245,6 +245,7 @@ class ZCIndexTestsBase: ...@@ -245,6 +245,7 @@ class ZCIndexTestsBase:
nbest, total = self.zc_index.query(w) nbest, total = self.zc_index.query(w)
self.assertEqual(total, 0, "did not expect to find %s" % w) self.assertEqual(total, 0, "did not expect to find %s" % w)
class CosineIndexTests(ZCIndexTestsBase, testIndex.CosineIndexTest): class CosineIndexTests(ZCIndexTestsBase, testIndex.CosineIndexTest):
# A fairly involved test of the ranking calculations based on # A fairly involved test of the ranking calculations based on
...@@ -566,13 +567,144 @@ class OkapiQueryTests(QueryTestsBase): ...@@ -566,13 +567,144 @@ class OkapiQueryTests(QueryTestsBase):
class PLexiconTests(unittest.TestCase): class PLexiconTests(unittest.TestCase):
def test_z3interfaces(self): def _getTargetClass(self):
from Products.ZCTextIndex.ZCTextIndex import PLexicon
return PLexicon
def _makeOne(self, id='testing', title='Testing', *pipeline):
return self._getTargetClass()(id, title, *pipeline)
def test_class_conforms_to_ILexicon(self):
from Products.ZCTextIndex.interfaces import ILexicon from Products.ZCTextIndex.interfaces import ILexicon
from zope.interface.verify import verifyClass
verifyClass(ILexicon, self._getTargetClass())
def test_instance_conforms_to_ILexicon(self):
from Products.ZCTextIndex.interfaces import ILexicon
from zope.interface.verify import verifyObject
verifyObject(ILexicon, self._makeOne())
def test_class_conforms_to_IZCLexicon(self):
from Products.ZCTextIndex.interfaces import IZCLexicon from Products.ZCTextIndex.interfaces import IZCLexicon
from zope.interface.verify import verifyClass from zope.interface.verify import verifyClass
verifyClass(IZCLexicon, self._getTargetClass())
verifyClass(ILexicon, PLexicon) def test_instance_conforms_to_IZCLexicon(self):
verifyClass(IZCLexicon, PLexicon) from Products.ZCTextIndex.interfaces import IZCLexicon
from zope.interface.verify import verifyObject
verifyObject(IZCLexicon, self._makeOne())
def test_queryLexicon_defaults_empty(self):
lexicon = self._makeOne()
info = lexicon.queryLexicon(REQUEST=None, words=None)
self.assertEqual(info['page'], 0)
self.assertEqual(info['rows'], 20)
self.assertEqual(info['cols'], 4)
self.assertEqual(info['start_word'], 1)
self.assertEqual(info['end_word'], 0)
self.assertEqual(info['word_count'], 0)
self.assertEqual(list(info['page_range']), [])
self.assertEqual(info['page_columns'], [])
def test_queryLexicon_defaults_non_empty(self):
WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
lexicon = self._makeOne()
lexicon.sourceToWordIds(WORDS)
info = lexicon.queryLexicon(REQUEST=None, words=None)
self.assertEqual(info['page'], 0)
self.assertEqual(info['rows'], 20)
self.assertEqual(info['cols'], 4)
self.assertEqual(info['start_word'], 1)
self.assertEqual(info['end_word'], 7)
self.assertEqual(info['word_count'], 7)
self.assertEqual(list(info['page_range']), [0])
self.assertEqual(info['page_columns'], [WORDS])
def test_queryLexicon_row_breaks(self):
WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
lexicon = self._makeOne()
lexicon.sourceToWordIds(WORDS)
info = lexicon.queryLexicon(REQUEST=None, words=None, rows=4)
self.assertEqual(info['page'], 0)
self.assertEqual(info['rows'], 4)
self.assertEqual(info['cols'], 4)
self.assertEqual(info['start_word'], 1)
self.assertEqual(info['end_word'], 7)
self.assertEqual(info['word_count'], 7)
self.assertEqual(list(info['page_range']), [0])
self.assertEqual(info['page_columns'], [WORDS[0:4], WORDS[4:]])
def test_queryLexicon_page_breaks(self):
WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
lexicon = self._makeOne()
lexicon.sourceToWordIds(WORDS)
info = lexicon.queryLexicon(REQUEST=None, words=None, rows=2, cols=2)
self.assertEqual(info['page'], 0)
self.assertEqual(info['rows'], 2)
self.assertEqual(info['cols'], 2)
self.assertEqual(info['start_word'], 1)
self.assertEqual(info['end_word'], 4)
self.assertEqual(info['word_count'], 7)
self.assertEqual(list(info['page_range']), [0, 1])
self.assertEqual(info['page_columns'], [WORDS[0:2], WORDS[2:4]])
def test_queryLexicon_page_break_not_first(self):
WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
lexicon = self._makeOne()
lexicon.sourceToWordIds(WORDS)
info = lexicon.queryLexicon(REQUEST=None, words=None,
page=1, rows=2, cols=2)
self.assertEqual(info['page'], 1)
self.assertEqual(info['rows'], 2)
self.assertEqual(info['cols'], 2)
self.assertEqual(info['start_word'], 5)
self.assertEqual(info['end_word'], 7)
self.assertEqual(info['word_count'], 7)
self.assertEqual(list(info['page_range']), [0, 1])
self.assertEqual(info['page_columns'], [WORDS[4:6], WORDS[6:]])
def test_queryLexicon_words_no_globbing(self):
WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
lexicon = self._makeOne()
lexicon.sourceToWordIds(WORDS)
info = lexicon.queryLexicon(REQUEST=None, words=['aaa', 'bbb'])
self.assertEqual(info['page'], 0)
self.assertEqual(info['rows'], 20)
self.assertEqual(info['cols'], 4)
self.assertEqual(info['start_word'], 1)
self.assertEqual(info['end_word'], 2)
self.assertEqual(info['word_count'], 2)
self.assertEqual(list(info['page_range']), [0])
self.assertEqual(info['page_columns'], [['aaa', 'bbb']])
def test_queryLexicon_words_w_globbing(self):
WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
lexicon = self._makeOne()
lexicon.sourceToWordIds(WORDS)
info = lexicon.queryLexicon(REQUEST=None, words=['aa*', 'bbb*'])
self.assertEqual(info['page'], 0)
self.assertEqual(info['rows'], 20)
self.assertEqual(info['cols'], 4)
self.assertEqual(info['start_word'], 1)
self.assertEqual(info['end_word'], 2)
self.assertEqual(info['word_count'], 2)
self.assertEqual(list(info['page_range']), [0])
self.assertEqual(info['page_columns'], [['aaa', 'bbb']])
def test_queryLexicon_uses_pipeline_for_normalization(self):
from Products.ZCTextIndex.Lexicon import CaseNormalizer
WORDS = 'aaa bbb ccc ddd eee fff ggg'.split()
lexicon = self._makeOne('test', 'Testing', CaseNormalizer())
lexicon.sourceToWordIds(WORDS)
info = lexicon.queryLexicon(REQUEST=None, words=['AA*', 'Bbb*'])
self.assertEqual(info['page'], 0)
self.assertEqual(info['rows'], 20)
self.assertEqual(info['cols'], 4)
self.assertEqual(info['start_word'], 1)
self.assertEqual(info['end_word'], 2)
self.assertEqual(info['word_count'], 2)
self.assertEqual(list(info['page_range']), [0])
self.assertEqual(info['page_columns'], [['aaa', 'bbb']])
def test_suite(): def test_suite():
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment