Commit 060f1457 authored by Takeshi Yamamoto's avatar Takeshi Yamamoto

Merged Zope/branches/tyam-unicodeSplitterPatch 104723:104761

parent a5596551
......@@ -76,7 +76,8 @@ class Lexicon(Persistent):
def termToWordIds(self, text):
last = _text2list(text)
for element in self._pipeline:
last = element.process(last)
process = getattr(element, "process_post_glob", element.process)
last = process(last)
wids = []
for word in last:
wids.append(self._wids.get(word, 0))
......
......@@ -94,6 +94,28 @@ class Test(unittest.TestCase):
wids = lexicon.termToWordIds('boxes')
self.assertEqual(wids, [0])
def testTermToWordIdsWithProcess_post_glob(self):
"""This test is for added process_post_glob"""
class AddedSplitter(Splitter):
def process_post_glob(self, lst):
assert lst == ['dogs']
return ['dogs']
lexicon = Lexicon(AddedSplitter())
wids = lexicon.sourceToWordIds('cats and dogs')
wids = lexicon.termToWordIds('dogs')
self.assertEqual(wids, [3])
def testMissingTermToWordIdsWithProcess_post_glob(self):
"""This test is for added process_post_glob"""
class AddedSplitter(Splitter):
def process_post_glob(self, lst):
assert lst == ['dogs']
return ['fox']
lexicon = Lexicon(AddedSplitter())
wids = lexicon.sourceToWordIds('cats and dogs')
wids = lexicon.termToWordIds('dogs')
self.assertEqual(wids, [0])
def testOnePipelineElement(self):
lexicon = Lexicon(Splitter(), StupidPipelineElement('dogs', 'fish'))
wids = lexicon.sourceToWordIds('cats and dogs')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment