Commit 86cef76b authored by Andreas Jung's avatar Andreas Jung

      - Collector #1815: ZCTextIndex accepts (again) sequences of strings to 
        be indexed.
parent dfb00ea7
...@@ -34,6 +34,9 @@ Zope Changes ...@@ -34,6 +34,9 @@ Zope Changes
Bugs fixed Bugs fixed
- Collector #1815: ZCTextIndex accepts (again) sequences of strings to
be indexed.
- Collector #1812: Fixed key error in ZSQL ZMI/Test - Collector #1812: Fixed key error in ZSQL ZMI/Test
- Fixed CMFBTreeFolder for CMF 1.5+ - Fixed CMFBTreeFolder for CMF 1.5+
......
...@@ -68,6 +68,9 @@ class IIndex(Interface.Base): ...@@ -68,6 +68,9 @@ class IIndex(Interface.Base):
"""Add a document with the specified id and text to the index. If a """Add a document with the specified id and text to the index. If a
document by that id already exists, replace its text with the new document by that id already exists, replace its text with the new
text provided text provided
text may be either a string (Unicode or otherwise) or a list
of strings from which to extract the terms under which to
index the source document.
""" """
def unindex_doc(docid): def unindex_doc(docid):
......
...@@ -152,7 +152,14 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -152,7 +152,14 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
## Pluggable Index APIs ## ## Pluggable Index APIs ##
def index_object(self, documentId, obj, threshold=None): def index_object(self, documentId, obj, threshold=None):
""" wrapper to handle indexing of multiple attributes """ """Wrapper for index_doc() handling indexing of multiple attributes.
Enter the document with the specified documentId in the index
under the terms extracted from the indexed text attributes,
each of which should yield either a string or a list of
strings (Unicode or otherwise) to be passed to index_doc().
"""
# XXX We currently ignore subtransaction threshold
# needed for backward compatibility # needed for backward compatibility
try: fields = self._indexed_attrs try: fields = self._indexed_attrs
...@@ -168,12 +175,22 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -168,12 +175,22 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
text = text() text = text()
if text is None: if text is None:
continue continue
all_texts.append(text) # To index each attribute separately, we could use the
# following line, but we have preferred to make a single
if all_texts: # call to index_doc() for all attributes together.
return self.index.index_doc(documentId, ' '.join(all_texts)) # res += self.index.index_doc(documentId, text)
else: if text:
return 0 if isinstance(text, (list, tuple, )):
all_texts.extend(text)
else:
all_texts.append(text)
# Check that we're sending only strings
all_texts = filter(lambda text: isinstance(text, basestring), \
all_texts)
if all_texts:
return self.index.index_doc(documentId, all_texts)
return res
def unindex_object(self, docid): def unindex_object(self, docid):
if self.index.has_doc(docid): if self.index.has_doc(docid):
......
...@@ -151,6 +151,29 @@ class ZCIndexTestsBase: ...@@ -151,6 +151,29 @@ class ZCIndexTestsBase:
nbest, total = zc_index.query('foo alpha gamma') nbest, total = zc_index.query('foo alpha gamma')
self.assertEqual(len(nbest), 0) self.assertEqual(len(nbest), 0)
def testListAttributes(self):
lexicon = PLexicon('lexicon', '',
Splitter(),
CaseNormalizer(),
StopWordRemover())
caller = LexiconHolder(self.lexicon)
zc_index = ZCTextIndex('name',
None,
caller,
self.IndexFactory,
'text1,text2',
'lexicon')
doc = Indexable2('Hello Tim', \
['Now is the winter of our discontent',
'Made glorious summer by this sun of York', ])
zc_index.index_object(1, doc)
nbest, total = zc_index.query('glorious')
self.assertEqual(len(nbest), 1)
nbest, total = zc_index.query('York Tim')
self.assertEqual(len(nbest), 1)
nbest, total = zc_index.query('Tuesday Tim York')
self.assertEqual(len(nbest), 0)
def testStopWords(self): def testStopWords(self):
# the only non-stopword is question # the only non-stopword is question
text = ("to be or not to be " text = ("to be or not to be "
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment