From addcfc682a51d70c394546c9cb34f4d9ed94ca36 Mon Sep 17 00:00:00 2001
From: Andreas Jung <yet@gmx.de>
Date: Tue, 12 Mar 2002 15:30:50 +0000
Subject: [PATCH]       - TextIndex: Enhanced splitter functionality now allows
 the         TextIndex to index numbers, single characters. It is also        
 possible to enable case-sensitive indexing. The new         configuration
 options are available through the addForm         of the Vocabulary object.

---
 doc/CHANGES.txt                               |  7 +++
 .../TextIndex/GlobbingLexicon.py              | 16 +++--
 .../PluginIndexes/TextIndex/Lexicon.py        | 25 ++++----
 .../Splitter/ISO_8859_1_Splitter/__init__.py  |  4 --
 .../Splitter/UnicodeSplitter/__init__.py      |  2 +-
 .../Splitter/ZopeSplitter/__init__.py         |  4 --
 .../TextIndex/Splitter/__init__.py            |  3 +-
 .../PluginIndexes/TextIndex/Vocabulary.py     | 27 ++++++---
 .../TextIndex/dtml/addVocabulary.dtml         | 59 ++++++++++++++++---
 .../TextIndex/dtml/manage_vocab.dtml          | 29 ++++++---
 10 files changed, 121 insertions(+), 55 deletions(-)

diff --git a/doc/CHANGES.txt b/doc/CHANGES.txt
index 0022077f5..df0dfce46 100755
--- a/doc/CHANGES.txt
+++ b/doc/CHANGES.txt
@@ -42,6 +42,13 @@ Zope Changes
       - Nicer formatting for the increasingly tall permissions
         table.
 
+      - TextIndex: Enhanced splitter functionality now allows the
+        TextIndex to index numbers, single characters. It is also 
+        possible to enable case-sensitive indexing. The new 
+        configuration options are available through the addForm
+        of the Vocabulary object.
+        
+
     Bugs:
 
       - Collector #32: Use difflib instead of ndiff
diff --git a/lib/python/Products/PluginIndexes/TextIndex/GlobbingLexicon.py b/lib/python/Products/PluginIndexes/TextIndex/GlobbingLexicon.py
index adfaa9201..04ed5e54d 100644
--- a/lib/python/Products/PluginIndexes/TextIndex/GlobbingLexicon.py
+++ b/lib/python/Products/PluginIndexes/TextIndex/GlobbingLexicon.py
@@ -14,7 +14,6 @@
 from Lexicon import Lexicon
 import Splitter
 
-
 import re, string
 
 from BTrees.IIBTree import IISet, union, IITreeSet
@@ -56,9 +55,10 @@ class GlobbingLexicon(Lexicon):
     eow = '$'
 
 
-    def __init__(self,useSplitter=None):
+    def __init__(self,useSplitter=None,extra=None):
         self.clear()
         self.useSplitter = useSplitter
+        self.splitterParams = extra
         self.SplitterFunc = Splitter.getSplitter(self.useSplitter)
 
     def clear(self):
@@ -239,9 +239,16 @@ class GlobbingLexicon(Lexicon):
         ## sense in stemming a globbing lexicon.
 
         try:
-            return self.SplitterFunc(astring,None,encoding)
+            return self.SplitterFunc(
+                    astring, 
+                    words, 
+                    encoding=encoding,
+                    singlechar=self.splitterParams.splitterSingleChars,
+                    indexnumbers=self.splitterParams.splitterIndexNumbers,
+                    casefolding=self.splitterParams.splitterCasefolding
+                    )
         except:
-            return self.SplitterFunc(astring,None)
+            return self.SplitterFunc(astring, words)
 
 
     def createRegex(self, pat):
@@ -269,4 +276,3 @@ class GlobbingLexicon(Lexicon):
 
         return "%s$" % result 
 
-
diff --git a/lib/python/Products/PluginIndexes/TextIndex/Lexicon.py b/lib/python/Products/PluginIndexes/TextIndex/Lexicon.py
index 1091c7a90..e75b31e0b 100644
--- a/lib/python/Products/PluginIndexes/TextIndex/Lexicon.py
+++ b/lib/python/Products/PluginIndexes/TextIndex/Lexicon.py
@@ -41,8 +41,7 @@ class Lexicon(Persistent, Implicit):
     # default for older objects
     stop_syn={}
 
-    def __init__(self, stop_syn=None,useSplitter=None):
-
+    def __init__(self, stop_syn=None,useSplitter=None,extra=None):
 
         self.clear()
         if stop_syn is None:
@@ -52,7 +51,7 @@ class Lexicon(Persistent, Implicit):
     
         self.useSplitter = Splitter.splitterNames[0]
         if useSplitter: self.useSplitter=useSplitter
-
+        self.splitterParams = extra
         self.SplitterFunc = Splitter.getSplitter(self.useSplitter)
 
 
@@ -153,10 +152,17 @@ class Lexicon(Persistent, Implicit):
 
     def Splitter(self, astring, words=None, encoding = "latin1"):
         """ wrap the splitter """
-        if words is None:
-            words = self.stop_syn
+        if words is None: words = self.stop_syn
+
         try:
-            return self.SplitterFunc(astring, words, encoding)
+            return self.SplitterFunc(
+                    astring, 
+                    words, 
+                    encoding=encoding,
+                    singlechar=self.splitterParams.splitterSingleChars,
+                    indexnumbers=self.splitterParams.splitterIndexNumbers,
+                    casefolding=self.splitterParams.splitterCasefolding
+                    )
         except:
             return self.SplitterFunc(astring, words)
 
@@ -164,10 +170,6 @@ class Lexicon(Persistent, Implicit):
     def query_hook(self, q):
         """ we don't want to modify the query cuz we're dumb """
         return q
-        
-
-
-
 
 stop_words=(
     'am', 'ii', 'iii', 'per', 'po', 're', 'a', 'about', 'above', 'across',
@@ -217,6 +219,3 @@ stop_words=(
 stop_word_dict={}
 for word in stop_words: stop_word_dict[word]=None
 
-
-
-
diff --git a/lib/python/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/__init__.py b/lib/python/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/__init__.py
index 5dcbcc20b..e69de29bb 100644
--- a/lib/python/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/__init__.py
+++ b/lib/python/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/__init__.py
@@ -1,4 +0,0 @@
-from ISO_8859_1_Splitter import ISO_8859_1_Splitter
-
-def Splitter(txt,stopwords=None,encoding='latin1'):
-    return ISO_8859_1_Splitter(txt,stopwords)
diff --git a/lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/__init__.py b/lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/__init__.py
index 6ad7659ba..8b1378917 100644
--- a/lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/__init__.py
+++ b/lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/__init__.py
@@ -1 +1 @@
-from UnicodeSplitter import UnicodeSplitter as Splitter
+
diff --git a/lib/python/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/__init__.py b/lib/python/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/__init__.py
index 2e8cb7bc9..e69de29bb 100644
--- a/lib/python/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/__init__.py
+++ b/lib/python/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/__init__.py
@@ -1,4 +0,0 @@
-from ZopeSplitter import ZopeSplitter 
-
-def Splitter(txt,stopwords={},encoding="latin1"):
-    return ZopeSplitter(txt,stopwords)
diff --git a/lib/python/Products/PluginIndexes/TextIndex/Splitter/__init__.py b/lib/python/Products/PluginIndexes/TextIndex/Splitter/__init__.py
index 5bc589537..65918e1e1 100644
--- a/lib/python/Products/PluginIndexes/TextIndex/Splitter/__init__.py
+++ b/lib/python/Products/PluginIndexes/TextIndex/Splitter/__init__.py
@@ -28,8 +28,7 @@ def getSplitter(name=None):
 
     if not name: name = splitterNames[0] 
     if not vars().has_key(name):
-        exec( "from %s import Splitter as %s" % (name,name))
-
+        exec( "from %s.%s import %s" % (name,name,name))
 
     return vars()[name]
     
diff --git a/lib/python/Products/PluginIndexes/TextIndex/Vocabulary.py b/lib/python/Products/PluginIndexes/TextIndex/Vocabulary.py
index ce4aa1b87..115b82891 100644
--- a/lib/python/Products/PluginIndexes/TextIndex/Vocabulary.py
+++ b/lib/python/Products/PluginIndexes/TextIndex/Vocabulary.py
@@ -23,18 +23,21 @@ from Products.PluginIndexes.TextIndex import Splitter
 
 manage_addVocabularyForm=DTMLFile('dtml/addVocabulary',globals())
 
-def manage_addVocabulary(self, id, title, globbing=None, splitter='', REQUEST=None):
+def manage_addVocabulary(self, id, title, globbing=None, extra=None,
+                         splitter='', REQUEST=None):
     """Add a Vocabulary object
     """
     id=str(id)
     title=str(title)
     if globbing: globbing=1
-    
-    c=Vocabulary(id, title, globbing,splitter)
+
+    c=Vocabulary(id, title, globbing,splitter,extra)
     self._setObject(id, c)
     if REQUEST is not None:
         return self.manage_main(self,REQUEST,update_menu=1)
 
+class _extra: pass
+
 
 class Vocabulary(Item, Persistent, Implicit,
                  AccessControl.Role.RoleManager,
@@ -75,20 +78,28 @@ class Vocabulary(Item, Persistent, Implicit,
     manage_main = DTMLFile('dtml/manage_vocab', globals())
     manage_query = DTMLFile('dtml/vocab_query', globals())
 
-    def __init__(self, id, title='', globbing=None,splitter=None):
+    def __init__(self, id, title='', globbing=None,splitter=None,extra=None):
         """ create the lexicon to manage... """
         self.id = id
         self.title = title
         self.globbing = not not globbing
-            
+
         self.useSplitter = Splitter.splitterNames[0]    
         if splitter:
             self.useSplitter = splitter
 
+        if not extra:
+            extra = _extra()
+            extra.splitterIndexNumbers = 0
+            extra.splitterSingleChars  = 0
+            extra.splitterCasefolding  = 1
+
         if globbing:
-            self.lexicon = GlobbingLexicon.GlobbingLexicon(useSplitter=self.useSplitter)
+            self.lexicon = GlobbingLexicon.GlobbingLexicon(
+                                useSplitter=self.useSplitter,extra=extra)
         else:
-            self.lexicon = Lexicon.Lexicon(stop_word_dict,useSplitter=self.useSplitter)
+            self.lexicon = Lexicon.Lexicon(stop_word_dict,
+                                useSplitter=self.useSplitter,extra=extra)
 
     def getLexicon(self):
         return self.lexicon
@@ -115,8 +126,6 @@ class Vocabulary(Item, Persistent, Implicit,
     def manage_stop_syn(self, stop_syn, REQUEST=None):
         pass
 
-
-
     def insert(self, word=''):
         self.lexicon.set(word)
 
diff --git a/lib/python/Products/PluginIndexes/TextIndex/dtml/addVocabulary.dtml b/lib/python/Products/PluginIndexes/TextIndex/dtml/addVocabulary.dtml
index 245f8a42f..50d6974dd 100644
--- a/lib/python/Products/PluginIndexes/TextIndex/dtml/addVocabulary.dtml
+++ b/lib/python/Products/PluginIndexes/TextIndex/dtml/addVocabulary.dtml
@@ -45,15 +45,56 @@
      </td>
    </tr>
   </dtml-if>
-<tr>
-  <td align="left" valign="top">
-  <div class="form-label">
-  Globbing?
-  </td>
-  <td align="left" valign="top">
-  <input type="checkbox" name="globbing" />
-  </td>
-</tr>
+
+  <tr>
+    <td align="left" valign="top">
+    <div class="form-label">
+    Index numbers
+    </td>
+    <td align="left" valign="top">
+    <select name="extra.splitterIndexNumbers:record:int">
+     <option value="0" selected>no
+     <option value="1">yes
+    </select>
+    </td>
+  </tr>
+
+  <tr>
+    <td align="left" valign="top">
+    <div class="form-label">
+    Index single characters 
+    </td>
+    <td align="left" valign="top">
+    <select name="extra.splitterSingleChars:record:int" >
+     <option value="0" selected>no
+     <option value="1">yes
+    </select>
+    </td>
+  </tr>
+
+  <tr>
+    <td align="left" valign="top">
+    <div class="form-label">
+    Case-insensitive
+    </td>
+    <td align="left" valign="top">
+    <select name="extra.splitterCasefolding:record:int">
+     <option value="0" >no
+     <option value="1"selected>yes
+    </select>
+    </td>
+  </tr>
+
+  <tr>
+    <td align="left" valign="top">
+    <div class="form-label">
+    globbing?
+    </td>
+    <td align="left" valign="top">
+    <input type="checkbox" name="globbing" />
+    </td>
+  </tr>
+
   <tr>
     <td align="left" valign="top">
     </td>
diff --git a/lib/python/Products/PluginIndexes/TextIndex/dtml/manage_vocab.dtml b/lib/python/Products/PluginIndexes/TextIndex/dtml/manage_vocab.dtml
index de9849753..2f8174ff3 100644
--- a/lib/python/Products/PluginIndexes/TextIndex/dtml/manage_vocab.dtml
+++ b/lib/python/Products/PluginIndexes/TextIndex/dtml/manage_vocab.dtml
@@ -4,15 +4,28 @@
 <dtml-var manage_tabs>
 
 <p class="form-text">
-  <dtml-try> 
-    <dtml-let x="getLexicon().multi_wc"></dtml-let>
-    Globbing is <em>enabled</em>
+
+  <dtml-let lexicon="getLexicon()">
+
+    <dtml-try> 
+      <dtml-let x="lexicon().multi_wc"></dtml-let>
+      Globbing is <em>enabled</em>
+      <dtml-except>
+      Globbing is <em>disabled</em>
+    </dtml-try>
+
+    <dtml-if useSplitter>
+     , Splitter is <em><dtml-var useSplitter></em>   
+    </dtml-if>
+
+    <dtml-try>
+     , Index number=<dtml-var "lexicon.splitterParams.splitterIndexNumbers">
+     , Case-insensitve=<dtml-var "lexicon.splitterParams.splitterCasefolding">
+     , Index single characters=<dtml-var "lexicon.splitterParams.splitterSingleChars">
     <dtml-except>
-    Globbing is <em>disabled</em>
-  </dtml-try>
-  <dtml-if useSplitter>
-   , Splitter is <em><dtml-var useSplitter></em>   
-  </dtml-if>
+    </dtml-try>
+
+  </dtml-let>
 </p>
 
 
-- 
2.30.9