merged r30995 from trunk:

- Collector #1815: ZCTextIndex accepts (again) sequences of strings to be indexed.

merged r30995 from trunk:
- Collector #1815: ZCTextIndex accepts (again) sequences of strings to be indexed.
ae607650 · 733b0fbd · ae607650 · ae607650 · ae607650 · ae607650
Commit ae607650 authored Jul 04, 2005 by
4 changed files
--- a/doc/CHANGES.txt
+++ b/doc/CHANGES.txt
@@ -38,6 +38,9 @@ Zope Changes

    Bugs Fixed

+      - Collector #1815: ZCTextIndex accepts (again) sequences of strings to 
+        be indexed.
+
      - Collector #1812: Fixed key error in ZSQL ZMI/Test

      - Fixed CMFBTreeFolder for CMF 1.5+

--- a/lib/python/Products/ZCTextIndex/IIndex.py
+++ b/lib/python/Products/ZCTextIndex/IIndex.py
@@ -68,6 +68,9 @@ class IIndex(Interface.Base):
        """Add a document with the specified id and text to the index. If a
        document by that id already exists, replace its text with the new
        text provided
+        text  may be either a string (Unicode or otherwise) or a list
+        of strings from which to extract the terms under which to
+        index the source document.
        """

    def unindex_doc(docid):

--- a/lib/python/Products/ZCTextIndex/ZCTextIndex.py
+++ b/lib/python/Products/ZCTextIndex/ZCTextIndex.py
@@ -161,7 +161,14 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
    ## Pluggable Index APIs ##

    def index_object(self, documentId, obj, threshold=None):
-        """ wrapper to handle indexing of multiple attributes """
+        """Wrapper for  index_doc()  handling indexing of multiple attributes.
+
+        Enter the document with the specified documentId in the index
+        under the terms extracted from the indexed text attributes,
+        each of which should yield either a string or a list of
+        strings (Unicode or otherwise) to be passed to index_doc().
+        """
+        # XXX We currently ignore subtransaction threshold

        # needed for backward compatibility
        try: fields = self._indexed_attrs
@@ -177,12 +184,22 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
                text = text()
            if text is None:
                continue
+            # To index each attribute separately, we could use the
+            # following line, but we have preferred to make a single
+            # call to index_doc() for all attributes together.
+            # res += self.index.index_doc(documentId, text)
+            if text:
+                if isinstance(text, (list, tuple, )):
+                    all_texts.extend(text)
+                else:
                    all_texts.append(text)

+        # Check that we're sending only strings
+        all_texts = filter(lambda text: isinstance(text, basestring), \
+                           all_texts)
        if all_texts:
-            return self.index.index_doc(documentId, ' '.join(all_texts))
-        else:
-            return 0
+            return self.index.index_doc(documentId, all_texts)
+        return res

    def unindex_object(self, docid):
        if self.index.has_doc(docid):

--- a/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py
+++ b/lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py
@@ -156,6 +156,29 @@ class ZCIndexTestsBase:
        nbest, total = zc_index.query('foo alpha gamma')
        self.assertEqual(len(nbest), 0)

+    def testListAttributes(self):
+        lexicon = PLexicon('lexicon', '',
+                            Splitter(),
+                            CaseNormalizer(),
+                            StopWordRemover())
+        caller = LexiconHolder(self.lexicon)
+        zc_index = ZCTextIndex('name',
+                                None,
+                                caller,
+                                self.IndexFactory,
+                               'text1,text2',
+                               'lexicon')
+        doc = Indexable2('Hello Tim', \
+                         ['Now is the winter of our discontent',
+                          'Made glorious summer by this sun of York', ])
+        zc_index.index_object(1, doc)
+        nbest, total = zc_index.query('glorious')
+        self.assertEqual(len(nbest), 1)
+        nbest, total = zc_index.query('York Tim')
+        self.assertEqual(len(nbest), 1)
+        nbest, total = zc_index.query('Tuesday Tim York')
+        self.assertEqual(len(nbest), 0)
+
    def testStopWords(self):
        # the only non-stopword is question
        text = ("to be or not to be "