From addcfc682a51d70c394546c9cb34f4d9ed94ca36 Mon Sep 17 00:00:00 2001 From: Andreas Jung <yet@gmx.de> Date: Tue, 12 Mar 2002 15:30:50 +0000 Subject: [PATCH] - TextIndex: Enhanced splitter functionality now allows the TextIndex to index numbers, single characters. It is also possible to enable case-sensitive indexing. The new configuration options are available through the addForm of the Vocabulary object. --- doc/CHANGES.txt | 7 +++ .../TextIndex/GlobbingLexicon.py | 16 +++-- .../PluginIndexes/TextIndex/Lexicon.py | 25 ++++---- .../Splitter/ISO_8859_1_Splitter/__init__.py | 4 -- .../Splitter/UnicodeSplitter/__init__.py | 2 +- .../Splitter/ZopeSplitter/__init__.py | 4 -- .../TextIndex/Splitter/__init__.py | 3 +- .../PluginIndexes/TextIndex/Vocabulary.py | 27 ++++++--- .../TextIndex/dtml/addVocabulary.dtml | 59 ++++++++++++++++--- .../TextIndex/dtml/manage_vocab.dtml | 29 ++++++--- 10 files changed, 121 insertions(+), 55 deletions(-) diff --git a/doc/CHANGES.txt b/doc/CHANGES.txt index 0022077f5..df0dfce46 100755 --- a/doc/CHANGES.txt +++ b/doc/CHANGES.txt @@ -42,6 +42,13 @@ Zope Changes - Nicer formatting for the increasingly tall permissions table. + - TextIndex: Enhanced splitter functionality now allows the + TextIndex to index numbers, single characters. It is also + possible to enable case-sensitive indexing. The new + configuration options are available through the addForm + of the Vocabulary object. + + Bugs: - Collector #32: Use difflib instead of ndiff diff --git a/lib/python/Products/PluginIndexes/TextIndex/GlobbingLexicon.py b/lib/python/Products/PluginIndexes/TextIndex/GlobbingLexicon.py index adfaa9201..04ed5e54d 100644 --- a/lib/python/Products/PluginIndexes/TextIndex/GlobbingLexicon.py +++ b/lib/python/Products/PluginIndexes/TextIndex/GlobbingLexicon.py @@ -14,7 +14,6 @@ from Lexicon import Lexicon import Splitter - import re, string from BTrees.IIBTree import IISet, union, IITreeSet @@ -56,9 +55,10 @@ class GlobbingLexicon(Lexicon): eow = '$' - def __init__(self,useSplitter=None): + def __init__(self,useSplitter=None,extra=None): self.clear() self.useSplitter = useSplitter + self.splitterParams = extra self.SplitterFunc = Splitter.getSplitter(self.useSplitter) def clear(self): @@ -239,9 +239,16 @@ class GlobbingLexicon(Lexicon): ## sense in stemming a globbing lexicon. try: - return self.SplitterFunc(astring,None,encoding) + return self.SplitterFunc( + astring, + words, + encoding=encoding, + singlechar=self.splitterParams.splitterSingleChars, + indexnumbers=self.splitterParams.splitterIndexNumbers, + casefolding=self.splitterParams.splitterCasefolding + ) except: - return self.SplitterFunc(astring,None) + return self.SplitterFunc(astring, words) def createRegex(self, pat): @@ -269,4 +276,3 @@ class GlobbingLexicon(Lexicon): return "%s$" % result - diff --git a/lib/python/Products/PluginIndexes/TextIndex/Lexicon.py b/lib/python/Products/PluginIndexes/TextIndex/Lexicon.py index 1091c7a90..e75b31e0b 100644 --- a/lib/python/Products/PluginIndexes/TextIndex/Lexicon.py +++ b/lib/python/Products/PluginIndexes/TextIndex/Lexicon.py @@ -41,8 +41,7 @@ class Lexicon(Persistent, Implicit): # default for older objects stop_syn={} - def __init__(self, stop_syn=None,useSplitter=None): - + def __init__(self, stop_syn=None,useSplitter=None,extra=None): self.clear() if stop_syn is None: @@ -52,7 +51,7 @@ class Lexicon(Persistent, Implicit): self.useSplitter = Splitter.splitterNames[0] if useSplitter: self.useSplitter=useSplitter - + self.splitterParams = extra self.SplitterFunc = Splitter.getSplitter(self.useSplitter) @@ -153,10 +152,17 @@ class Lexicon(Persistent, Implicit): def Splitter(self, astring, words=None, encoding = "latin1"): """ wrap the splitter """ - if words is None: - words = self.stop_syn + if words is None: words = self.stop_syn + try: - return self.SplitterFunc(astring, words, encoding) + return self.SplitterFunc( + astring, + words, + encoding=encoding, + singlechar=self.splitterParams.splitterSingleChars, + indexnumbers=self.splitterParams.splitterIndexNumbers, + casefolding=self.splitterParams.splitterCasefolding + ) except: return self.SplitterFunc(astring, words) @@ -164,10 +170,6 @@ class Lexicon(Persistent, Implicit): def query_hook(self, q): """ we don't want to modify the query cuz we're dumb """ return q - - - - stop_words=( 'am', 'ii', 'iii', 'per', 'po', 're', 'a', 'about', 'above', 'across', @@ -217,6 +219,3 @@ stop_words=( stop_word_dict={} for word in stop_words: stop_word_dict[word]=None - - - diff --git a/lib/python/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/__init__.py b/lib/python/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/__init__.py index 5dcbcc20b..e69de29bb 100644 --- a/lib/python/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/__init__.py +++ b/lib/python/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/__init__.py @@ -1,4 +0,0 @@ -from ISO_8859_1_Splitter import ISO_8859_1_Splitter - -def Splitter(txt,stopwords=None,encoding='latin1'): - return ISO_8859_1_Splitter(txt,stopwords) diff --git a/lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/__init__.py b/lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/__init__.py index 6ad7659ba..8b1378917 100644 --- a/lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/__init__.py +++ b/lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/__init__.py @@ -1 +1 @@ -from UnicodeSplitter import UnicodeSplitter as Splitter + diff --git a/lib/python/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/__init__.py b/lib/python/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/__init__.py index 2e8cb7bc9..e69de29bb 100644 --- a/lib/python/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/__init__.py +++ b/lib/python/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/__init__.py @@ -1,4 +0,0 @@ -from ZopeSplitter import ZopeSplitter - -def Splitter(txt,stopwords={},encoding="latin1"): - return ZopeSplitter(txt,stopwords) diff --git a/lib/python/Products/PluginIndexes/TextIndex/Splitter/__init__.py b/lib/python/Products/PluginIndexes/TextIndex/Splitter/__init__.py index 5bc589537..65918e1e1 100644 --- a/lib/python/Products/PluginIndexes/TextIndex/Splitter/__init__.py +++ b/lib/python/Products/PluginIndexes/TextIndex/Splitter/__init__.py @@ -28,8 +28,7 @@ def getSplitter(name=None): if not name: name = splitterNames[0] if not vars().has_key(name): - exec( "from %s import Splitter as %s" % (name,name)) - + exec( "from %s.%s import %s" % (name,name,name)) return vars()[name] diff --git a/lib/python/Products/PluginIndexes/TextIndex/Vocabulary.py b/lib/python/Products/PluginIndexes/TextIndex/Vocabulary.py index ce4aa1b87..115b82891 100644 --- a/lib/python/Products/PluginIndexes/TextIndex/Vocabulary.py +++ b/lib/python/Products/PluginIndexes/TextIndex/Vocabulary.py @@ -23,18 +23,21 @@ from Products.PluginIndexes.TextIndex import Splitter manage_addVocabularyForm=DTMLFile('dtml/addVocabulary',globals()) -def manage_addVocabulary(self, id, title, globbing=None, splitter='', REQUEST=None): +def manage_addVocabulary(self, id, title, globbing=None, extra=None, + splitter='', REQUEST=None): """Add a Vocabulary object """ id=str(id) title=str(title) if globbing: globbing=1 - - c=Vocabulary(id, title, globbing,splitter) + + c=Vocabulary(id, title, globbing,splitter,extra) self._setObject(id, c) if REQUEST is not None: return self.manage_main(self,REQUEST,update_menu=1) +class _extra: pass + class Vocabulary(Item, Persistent, Implicit, AccessControl.Role.RoleManager, @@ -75,20 +78,28 @@ class Vocabulary(Item, Persistent, Implicit, manage_main = DTMLFile('dtml/manage_vocab', globals()) manage_query = DTMLFile('dtml/vocab_query', globals()) - def __init__(self, id, title='', globbing=None,splitter=None): + def __init__(self, id, title='', globbing=None,splitter=None,extra=None): """ create the lexicon to manage... """ self.id = id self.title = title self.globbing = not not globbing - + self.useSplitter = Splitter.splitterNames[0] if splitter: self.useSplitter = splitter + if not extra: + extra = _extra() + extra.splitterIndexNumbers = 0 + extra.splitterSingleChars = 0 + extra.splitterCasefolding = 1 + if globbing: - self.lexicon = GlobbingLexicon.GlobbingLexicon(useSplitter=self.useSplitter) + self.lexicon = GlobbingLexicon.GlobbingLexicon( + useSplitter=self.useSplitter,extra=extra) else: - self.lexicon = Lexicon.Lexicon(stop_word_dict,useSplitter=self.useSplitter) + self.lexicon = Lexicon.Lexicon(stop_word_dict, + useSplitter=self.useSplitter,extra=extra) def getLexicon(self): return self.lexicon @@ -115,8 +126,6 @@ class Vocabulary(Item, Persistent, Implicit, def manage_stop_syn(self, stop_syn, REQUEST=None): pass - - def insert(self, word=''): self.lexicon.set(word) diff --git a/lib/python/Products/PluginIndexes/TextIndex/dtml/addVocabulary.dtml b/lib/python/Products/PluginIndexes/TextIndex/dtml/addVocabulary.dtml index 245f8a42f..50d6974dd 100644 --- a/lib/python/Products/PluginIndexes/TextIndex/dtml/addVocabulary.dtml +++ b/lib/python/Products/PluginIndexes/TextIndex/dtml/addVocabulary.dtml @@ -45,15 +45,56 @@ </td> </tr> </dtml-if> -<tr> - <td align="left" valign="top"> - <div class="form-label"> - Globbing? - </td> - <td align="left" valign="top"> - <input type="checkbox" name="globbing" /> - </td> -</tr> + + <tr> + <td align="left" valign="top"> + <div class="form-label"> + Index numbers + </td> + <td align="left" valign="top"> + <select name="extra.splitterIndexNumbers:record:int"> + <option value="0" selected>no + <option value="1">yes + </select> + </td> + </tr> + + <tr> + <td align="left" valign="top"> + <div class="form-label"> + Index single characters + </td> + <td align="left" valign="top"> + <select name="extra.splitterSingleChars:record:int" > + <option value="0" selected>no + <option value="1">yes + </select> + </td> + </tr> + + <tr> + <td align="left" valign="top"> + <div class="form-label"> + Case-insensitive + </td> + <td align="left" valign="top"> + <select name="extra.splitterCasefolding:record:int"> + <option value="0" >no + <option value="1"selected>yes + </select> + </td> + </tr> + + <tr> + <td align="left" valign="top"> + <div class="form-label"> + globbing? + </td> + <td align="left" valign="top"> + <input type="checkbox" name="globbing" /> + </td> + </tr> + <tr> <td align="left" valign="top"> </td> diff --git a/lib/python/Products/PluginIndexes/TextIndex/dtml/manage_vocab.dtml b/lib/python/Products/PluginIndexes/TextIndex/dtml/manage_vocab.dtml index de9849753..2f8174ff3 100644 --- a/lib/python/Products/PluginIndexes/TextIndex/dtml/manage_vocab.dtml +++ b/lib/python/Products/PluginIndexes/TextIndex/dtml/manage_vocab.dtml @@ -4,15 +4,28 @@ <dtml-var manage_tabs> <p class="form-text"> - <dtml-try> - <dtml-let x="getLexicon().multi_wc"></dtml-let> - Globbing is <em>enabled</em> + + <dtml-let lexicon="getLexicon()"> + + <dtml-try> + <dtml-let x="lexicon().multi_wc"></dtml-let> + Globbing is <em>enabled</em> + <dtml-except> + Globbing is <em>disabled</em> + </dtml-try> + + <dtml-if useSplitter> + , Splitter is <em><dtml-var useSplitter></em> + </dtml-if> + + <dtml-try> + , Index number=<dtml-var "lexicon.splitterParams.splitterIndexNumbers"> + , Case-insensitve=<dtml-var "lexicon.splitterParams.splitterCasefolding"> + , Index single characters=<dtml-var "lexicon.splitterParams.splitterSingleChars"> <dtml-except> - Globbing is <em>disabled</em> - </dtml-try> - <dtml-if useSplitter> - , Splitter is <em><dtml-var useSplitter></em> - </dtml-if> + </dtml-try> + + </dtml-let> </p> -- 2.30.9