added 3 new parameters for all zope splitters

17f69863 · Andreas Jung · fc443b19 · 17f69863 · 17f69863 · 17f69863
Commit 17f69863 authored Jan 09, 2002 by Andreas Jung
4 changed files
--- a/doc/CHANGES.txt
+++ b/doc/CHANGES.txt
@@ -27,6 +27,19 @@ Zope Changes

    Features Added

+      - TextIndex/Splitters: the constructor of all three splitters
+        has now three new optional parameters:
+
+        'maxlen'=(1-256) -  to specify the maximum length of 
+                            splitted words
+
+        'singlechar'=(1|0) - allows single characters to be indexed
+
+        'indexnumbers'=(1|0)- allows numbers to be indexed
+
+        The default values of all parameters reflect the standard
+        behaviour.
+
      - Enhancements to utilites/requestprofiler.py:

        Added readstats and writestats features which allow for saves and

--- a/lib/python/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/src/ISO_8859_1_Splitter.c
+++ b/lib/python/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/src/ISO_8859_1_Splitter.c
@@ -32,6 +32,9 @@ typedef struct
    PyObject *text, *synstop;
    char *here, *end;
    int index;
+    int allow_single_chars;
+    int index_numbers;
+    int max_len;
 }

 Splitter;
@@ -117,6 +120,32 @@ Splitter_length(Splitter *self)
    return self->index+1;
 }

+
+static PyObject *
+Splitter_split(Splitter*self)
+{
+    PyObject *list=NULL,*word=NULL;
+
+    UNLESS(list = PyList_New(0)) return NULL;
+
+    Splitter_reset(self);
+
+    while (1) {
+        Py_XDECREF(word);
+
+        UNLESS(word = next_word(self,NULL,NULL)) return NULL;
+
+        if (word == Py_None) {
+            return list;
+        }
+
+        PyList_Append(list,word);
+    }
+
+    return list;
+}
+
+
 static PyObject *
 Splitter_concat(Splitter *self, PyObject *other)
 {
@@ -155,7 +184,7 @@ check_synstop(Splitter *self, PyObject *word)

    len = PyString_Size(word);

-    if(len < 2)	/* Single-letter words are stop words! */
+    if(len < 2 && ! self->allow_single_chars)	/* Single-letter words are stop words! */
    {
        Py_INCREF(Py_None);
        return Py_None;
@@ -167,7 +196,7 @@ check_synstop(Splitter *self, PyObject *word)
    for (; --len >= 0 && ! isalpha((unsigned char)cword[len]); )

        ;
-    if (len < 0) {
+    if (len < 0 && ! self->index_numbers) {
        Py_INCREF(Py_None);
        return Py_None;
    }
@@ -197,12 +226,11 @@ check_synstop(Splitter *self, PyObject *word)
    return value;		/* Which must be None! */
 }

-#define MAX_WORD 64		/* Words longer than MAX_WORD are stemmed */

 static PyObject *
 next_word(Splitter *self, char **startpos, char **endpos)
 {
-    char wbuf[MAX_WORD];
+    char wbuf[256];
    char *end, *here, *b;
    int i = 0, c;
    PyObject *pyword, *res;
@@ -232,13 +260,13 @@ next_word(Splitter *self, char **startpos, char **endpos)
            if(startpos && i==0)
                *startpos=here;

-            if(i++ < MAX_WORD)
+            if(i++ < self->max_len)
                *b++ = c;

        } else if (i != 0) { /* We've found the end of a word */

-            if(i >= MAX_WORD)
-                i=MAX_WORD; /* "stem" the long word */
+            if(i >= self->max_len)
+                i=self->max_len; /* "stem" the long word */

            UNLESS(pyword = PyString_FromStringAndSize(wbuf, i)) {
                self->here=here;
@@ -282,8 +310,8 @@ next_word(Splitter *self, char **startpos, char **endpos)

    /* We've reached the end of the string */

-    if(i >= MAX_WORD)
-        i=MAX_WORD; /* "stem" the long word */
+    if(i >= self->max_len)
+        i=self->max_len; /* "stem" the long word */

    if (i == 0) {
        /* No words */
@@ -416,6 +444,9 @@ err:

 static struct PyMethodDef Splitter_methods[] =
    {
+        { "split", (PyCFunction)Splitter_split, 0,
+            "split() -- Split the string in one run"
+        },
        { "pos", (PyCFunction)Splitter_pos, 0,
            "pos(index) -- Return the starting and ending position of a token"
        },
@@ -459,7 +490,7 @@ static PyTypeObject SplitterType = {
                                       SplitterType__doc__ /* Documentation string */
                                   };

-static char *splitter_args[]={"doc","synstop","encoding",NULL};
+static char *splitter_args[]={"doc","synstop","encoding","singlechar","indexnumbers","maxlen",NULL};

 static PyObject *
 get_Splitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
@@ -467,8 +498,29 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
    Splitter *self;
    PyObject *doc, *synstop = NULL;
    char * encoding="latin1";
+    int single_char = 0;
+    int index_numbers = 0;
+    int max_len=64;
+
+    UNLESS(PyArg_ParseTupleAndKeywords(args,keywds,"O|Osiii",splitter_args,&doc,&synstop,&encoding,&single_char,&index_numbers,&max_len)) return NULL;
+
+
+    if (index_numbers<0 || index_numbers>1) {
+        PyErr_SetString(PyExc_ValueError,"indexnumbers must be 0 or 1");
+        return NULL;
+    }
+
+    if (single_char<0 || single_char>1) {
+        PyErr_SetString(PyExc_ValueError,"singlechar must be 0 or 1");
+        return NULL;
+    }
+
+    if (max_len<1 || max_len>128) {
+        PyErr_SetString(PyExc_ValueError,"maxlen must be between 1 and 128");
+        return NULL;
+    }
+

-    UNLESS(PyArg_ParseTupleAndKeywords(args,keywds,"O|Os",splitter_args,&doc,&synstop,&encoding)) return NULL;

    UNLESS(self = PyObject_NEW(Splitter, &SplitterType)) return NULL;

@@ -484,6 +536,9 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
    UNLESS(self->here=PyString_AsString(self->text)) goto err;

    self->end = self->here + PyString_Size(self->text);
+    self->allow_single_chars    = single_char;
+    self->index_numbers         = index_numbers;
+    self->max_len               = max_len;

    self->index = -1;

@@ -498,7 +553,7 @@ err:
 static struct PyMethodDef Splitter_module_methods[] =
    {
        { "ISO_8859_1_Splitter", (PyCFunction)get_Splitter, METH_VARARGS|METH_KEYWORDS,
-            "ISO_8859_1_Splitter(doc[,synstop]) -- Return a word splitter"
+          "ISO_8859_1_Splitter(doc[,synstop][,encoding][,singlechar][,indexnumbers][,maxlen]) -- Return a word splitter"
        },

        { NULL, NULL }
@@ -509,7 +564,7 @@ static char Splitter_module_documentation[] =
    "\n"
    "for use in an inverted index\n"
    "\n"
-    "$Id: ISO_8859_1_Splitter.c,v 1.5 2001/11/28 15:51:04 matt Exp $\n"
+    "$Id: ISO_8859_1_Splitter.c,v 1.6 2002/01/09 15:17:34 andreasjung Exp $\n"
    ;


@@ -518,7 +573,7 @@ void
 initISO_8859_1_Splitter(void)
 {
    PyObject *m, *d;
-    char *rev="$Revision: 1.5 $";
+    char *rev="$Revision: 1.6 $";

    /* Create the module and add the functions */
    initSplitterTrtabs();

--- a/lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/src/UnicodeSplitter.c
+++ b/lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/src/UnicodeSplitter.c
@@ -13,8 +13,6 @@

 #include "Python.h"

-#define MAX_WORD 64		/* Words longer than MAX_WORD are stemmed */
-
 #ifndef min
 #define min(a,b) ((a)<(b)?(a):(b))
 #endif
@@ -24,8 +22,12 @@ typedef struct
    PyObject_HEAD
    PyObject *list;
    PyObject *synstop;
+    int max_len;
+    int allow_single_chars;
+    int index_numbers;
 }
 Splitter;
+
 static
 PyUnicodeObject *prepareString(PyUnicodeObject *o);

@@ -34,6 +36,9 @@ static PyObject *checkSynword(Splitter *self, PyObject *word)
    /* Always returns a borrowed reference */
    PyObject *value;

+    if (PyUnicode_GetSize(word)==1 && ! self->allow_single_chars)
+        return Py_None;
+
    if (self->synstop) {
        value = PyDict_GetItem(self->synstop,word);
        if (value != NULL) {
@@ -82,6 +87,14 @@ Splitter_item(Splitter *self, int i)
  return item;
 }

+static PyObject * 
+Splitter_split(Splitter *self) {
+
+    Py_INCREF(self->list);
+
+    return self->list;
+}
+

 static PyObject *
 Splitter_indexes(Splitter *self, PyObject *args)
@@ -133,6 +146,8 @@ Splitter_pos(Splitter *self, PyObject *args)

 static struct PyMethodDef Splitter_methods[] =
    {
+        { "split", (PyCFunction) Splitter_split, 0,
+          "split() -- Split string in one run" },
        { "indexes", (PyCFunction)Splitter_indexes, METH_VARARGS,
          "indexes(word) -- Return a list of the indexes of word in the sequence",
        },
@@ -198,14 +213,19 @@ static int splitUnicodeString(Splitter *self,PyUnicodeObject *doc)
        register Py_UNICODE ch;

        ch = *s;
-#ifdef DEBUG
-        printf("%d %c %d\n",i,ch,ch);
-        fflush(stdout);
-#endif
+
        if (!inside_word) {
-            if (Py_UNICODE_ISALPHA(ch)) {
-                inside_word=1;
-                start = i;
+            if (self->index_numbers) {
+                if (Py_UNICODE_ISALNUM(ch)) {
+                    inside_word=1;
+                    start = i;
+                }
+
+            } else {
+                if (Py_UNICODE_ISALPHA(ch)) {
+                    inside_word=1;
+                    start = i;
+                }
            }
        } else {

@@ -213,7 +233,7 @@ static int splitUnicodeString(Splitter *self,PyUnicodeObject *doc)
                inside_word = 0;

                word = PySequence_GetSlice((PyObject *)doc1,start,
-                                           min(i, start + MAX_WORD));
+                                           min(i, start + self->max_len));
                if (word==NULL)
                  goto err;

@@ -234,7 +254,7 @@ static int splitUnicodeString(Splitter *self,PyUnicodeObject *doc)

    if (inside_word) {
        word = PySequence_GetSlice((PyObject *)doc1,start,
-                                   min(len, start + MAX_WORD));
+                                   min(len, start + self->max_len));
        if (word==NULL)
          goto err;

@@ -288,7 +308,7 @@ PyUnicodeObject *prepareString(PyUnicodeObject *o)
    return  u;
 }

-static char *splitter_args[]={"doc","synstop","encoding",NULL};
+static char *splitter_args[]={"doc","synstop","encoding","indexnumbers","singlechar","maxlen",NULL};


 static PyObject *
@@ -297,9 +317,11 @@ newSplitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
    Splitter *self=NULL;
    PyObject *doc=NULL, *unicodedoc=NULL,*synstop=NULL;
    char *encoding = "latin1";
+    int index_numbers = 0;
+    int max_len=64;
+    int single_char = 0;

-    if (! (self = PyObject_NEW(Splitter, &SplitterType))) return NULL;
-    if (! (PyArg_ParseTupleAndKeywords(args,keywds,"O|Os",splitter_args,&doc,&synstop,&encoding))) return NULL;
+    if (! (PyArg_ParseTupleAndKeywords(args,keywds,"O|Osiii",splitter_args,&doc,&synstop,&encoding,&index_numbers,&single_char,&max_len))) return NULL;

 #ifdef DEBUG
    puts("got text");
@@ -307,6 +329,21 @@ newSplitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
    fflush(stdout);
 #endif

+    if (index_numbers<0 || index_numbers>1) {
+        PyErr_SetString(PyExc_ValueError,"indexnumbers must be 0 or 1");
+        return NULL;
+    }
+
+    if (single_char<0 || single_char>1) {
+        PyErr_SetString(PyExc_ValueError,"singlechar must be 0 or 1");
+        return NULL;
+    }
+
+    if (max_len<1 || max_len>128) {
+        PyErr_SetString(PyExc_ValueError,"maxlen must be between 1 and 128");
+        return NULL;
+    }
+
    if (PyString_Check(doc)) {

        unicodedoc = PyUnicode_FromEncodedObject(doc,encoding,"strict");
@@ -324,11 +361,17 @@ newSplitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
        return NULL;
    }

+    if (! (self = PyObject_NEW(Splitter, &SplitterType))) return NULL;
+
    if (synstop) {
        self->synstop = synstop;
        Py_INCREF(synstop);
    } else  self->synstop=NULL;

+    self->index_numbers      = index_numbers;
+    self->max_len            = max_len;
+    self->allow_single_chars = single_char;
+
    if ((splitUnicodeString(self,(PyUnicodeObject *)unicodedoc)) < 0)
      goto err;

@@ -344,11 +387,6 @@ err:

 static struct PyMethodDef Splitter_module_methods[] =
    {
-        { "pos", (PyCFunction) Splitter_pos, 0,
-          "pos(index) -- Return the starting and ending position of a token" },
-        { "indexes", (PyCFunction) Splitter_indexes, METH_VARARGS,
-          "indexes(word) -- Return a list of the indexes of word in sequence" },
-   
        { "UnicodeSplitter", (PyCFunction)newSplitter,
          METH_VARARGS|METH_KEYWORDS,
          "UnicodeSplitter(doc[,synstop][,encoding='latin1']) "
@@ -362,7 +400,7 @@ static char Splitter_module_documentation[] =
    "\n"
    "for use in an inverted index\n"
    "\n"
-    "$Id: UnicodeSplitter.c,v 1.12 2001/11/28 15:51:04 matt Exp $\n"
+    "$Id: UnicodeSplitter.c,v 1.13 2002/01/09 15:17:34 andreasjung Exp $\n"
    ;


@@ -370,7 +408,7 @@ void
 initUnicodeSplitter(void)
 {
    PyObject *m, *d;
-    char *rev="$Revision: 1.12 $";
+    char *rev="$Revision: 1.13 $";

    /* Create the module and add the functions */
    m = Py_InitModule4("UnicodeSplitter", Splitter_module_methods,

--- a/lib/python/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/src/ZopeSplitter.c
+++ b/lib/python/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/src/ZopeSplitter.c
 /*****************************************************************************
-
+ 
  Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
  
  This software is subject to the provisions of the Zope Public License,
@@ -10,6 +10,8 @@
  FOR A PARTICULAR PURPOSE
  
 ****************************************************************************/
+
+
 #include "Python.h"
 #include <ctype.h>

@@ -23,6 +25,9 @@ typedef struct
    PyObject *text, *synstop;
    char *here, *end;
    int index;
+    int allow_single_chars;
+    int index_numbers;
+    int max_len;
 }

 Splitter;
@@ -98,7 +103,7 @@ check_synstop(Splitter *self, PyObject *word)
    cword = PyString_AsString(word);
    len = PyString_Size(word);

-    if(len < 2)	/* Single-letter words are stop words! */
+    if(len < 2 && ! self->allow_single_chars)	/* Single-letter words are stop words! */
    {
        Py_INCREF(Py_None);
        return Py_None;
@@ -110,7 +115,7 @@ check_synstop(Splitter *self, PyObject *word)
    for (; --len >= 0 && ! isalpha((unsigned char)cword[len]); )

        ;
-    if (len < 0) {
+    if (len < 0 && ! self->index_numbers) {
        Py_INCREF(Py_None);
        return Py_None;
    }
@@ -140,12 +145,11 @@ check_synstop(Splitter *self, PyObject *word)
    return value;		/* Which must be None! */
 }

-#define MAX_WORD 64		/* Words longer than MAX_WORD are stemmed */

 static PyObject *
 next_word(Splitter *self, char **startpos, char **endpos)
 {
-    char wbuf[MAX_WORD];
+    char wbuf[256];
    char *end, *here, *b;
    int i = 0, c;
    PyObject *pyword, *res;
@@ -175,13 +179,13 @@ next_word(Splitter *self, char **startpos, char **endpos)
            if(startpos && i==0)
                *startpos=here;

-            if(i++ < MAX_WORD)
+            if(i++ < self->max_len)
                *b++ = c;

        } else if (i != 0) { /* We've found the end of a word */

-            if(i >= MAX_WORD)
-                i=MAX_WORD; /* "stem" the long word */
+            if(i >= self->max_len)
+                i=self->max_len; /* "stem" the long word */

            UNLESS(pyword = PyString_FromStringAndSize(wbuf, i)) {
                self->here=here;
@@ -225,8 +229,8 @@ next_word(Splitter *self, char **startpos, char **endpos)

    /* We've reached the end of the string */

-    if(i >= MAX_WORD)
-        i=MAX_WORD; /* "stem" the long word */
+    if(i >= self->max_len)
+        i=self->max_len; /* "stem" the long word */

    if (i == 0) {
        /* No words */
@@ -274,6 +278,31 @@ Splitter_item(Splitter *self, int i)
    return word;
 }

+
+static PyObject *
+Splitter_split(Splitter*self)
+{
+    PyObject *list=NULL,*word=NULL;
+
+    UNLESS(list = PyList_New(0)) return NULL;
+
+    Splitter_reset(self);
+
+    while (1) {
+        Py_XDECREF(word);
+
+        UNLESS(word = next_word(self,NULL,NULL)) return NULL;
+
+        if (word == Py_None) {
+            return list;
+        }
+
+        PyList_Append(list,word);
+    }
+
+    return list;
+}
+
 static PyObject *
 Splitter_slice(Splitter *self, int i, int j)
 {
@@ -282,14 +311,14 @@ Splitter_slice(Splitter *self, int i, int j)
 }

 static PySequenceMethods Splitter_as_sequence = {
-            (inquiry)Splitter_length,        /*sq_length*/
-            (binaryfunc)Splitter_concat,     /*sq_concat*/
-            (intargfunc)Splitter_repeat,     /*sq_repeat*/
-            (intargfunc)Splitter_item,       /*sq_item*/
-            (intintargfunc)Splitter_slice,   /*sq_slice*/
-            (intobjargproc)0,                    /*sq_ass_item*/
-            (intintobjargproc)0,                 /*sq_ass_slice*/
-        };
+    (inquiry)Splitter_length,        /*sq_length*/
+    (binaryfunc)Splitter_concat,     /*sq_concat*/
+    (intargfunc)Splitter_repeat,     /*sq_repeat*/
+    (intargfunc)Splitter_item,       /*sq_item*/
+    (intintargfunc)Splitter_slice,   /*sq_slice*/
+    (intobjargproc)0,                    /*sq_ass_item*/
+    (intintobjargproc)0,                 /*sq_ass_slice*/
+};

 static PyObject *
 Splitter_pos(Splitter *self, PyObject *args)
@@ -359,8 +388,12 @@ err:

 static struct PyMethodDef Splitter_methods[] =
    {
+        { "split", (PyCFunction)Splitter_split, 0,
+            "split() -- Split complete string in one run"
+        },
+
        { "pos", (PyCFunction)Splitter_pos, 0,
-            "pos(index) -- Return the starting and ending position of a token"
+          "pos(index) -- Return the starting and ending position of a token"
        },

        { "indexes", (PyCFunction)Splitter_indexes, METH_VARARGS,
@@ -378,31 +411,31 @@ Splitter_getattr(Splitter *self, char *name)
 static char SplitterType__doc__[] = "";

 static PyTypeObject SplitterType = {
-                                       PyObject_HEAD_INIT(NULL)
-                                       0,                                 /*ob_size*/
-                                       "Splitter",                    /*tp_name*/
-                                       sizeof(Splitter),              /*tp_basicsize*/
-                                       0,                                 /*tp_itemsize*/
-                                       /* methods */
-                                       (destructor)Splitter_dealloc,  /*tp_dealloc*/
-                                       (printfunc)0,                      /*tp_print*/
-                                       (getattrfunc)Splitter_getattr, /*tp_getattr*/
-                                       (setattrfunc)0,                    /*tp_setattr*/
-                                       (cmpfunc)0,                        /*tp_compare*/
-                                       (reprfunc)0,                       /*tp_repr*/
-                                       0,                                 /*tp_as_number*/
-                                       &Splitter_as_sequence,         /*tp_as_sequence*/
-                                       0,                                 /*tp_as_mapping*/
-                                       (hashfunc)0,                       /*tp_hash*/
-                                       (ternaryfunc)0,                    /*tp_call*/
-                                       (reprfunc)0,                       /*tp_str*/
-
-                                       /* Space for future expansion */
-                                       0L,0L,0L,0L,
-                                       SplitterType__doc__ /* Documentation string */
-                                   };
-
-static char *splitter_args[]={"doc","synstop","encoding",NULL};
+    PyObject_HEAD_INIT(NULL)
+    0,                                 /*ob_size*/
+    "Splitter",                    /*tp_name*/
+    sizeof(Splitter),              /*tp_basicsize*/
+    0,                                 /*tp_itemsize*/
+    /* methods */
+    (destructor)Splitter_dealloc,  /*tp_dealloc*/
+    (printfunc)0,                      /*tp_print*/
+    (getattrfunc)Splitter_getattr, /*tp_getattr*/
+    (setattrfunc)0,                    /*tp_setattr*/
+    (cmpfunc)0,                        /*tp_compare*/
+    (reprfunc)0,                       /*tp_repr*/
+    0,                                 /*tp_as_number*/
+    &Splitter_as_sequence,         /*tp_as_sequence*/
+    0,                                 /*tp_as_mapping*/
+    (hashfunc)0,                       /*tp_hash*/
+    (ternaryfunc)0,                    /*tp_call*/
+    (reprfunc)0,                       /*tp_str*/
+
+    /* Space for future expansion */
+    0L,0L,0L,0L,
+    SplitterType__doc__ /* Documentation string */
+};
+
+static char *splitter_args[]={"doc","synstop","encoding","singlechar","indexnumbers","maxlen",NULL};


 static PyObject *
@@ -411,8 +444,28 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject * keywds)
    Splitter *self;
    PyObject *doc, *synstop = NULL;
    char *encoding = "latin1";
+    int single_char = 0;
+    int index_numbers = 0;
+    int max_len= 64;
+
+    UNLESS(PyArg_ParseTupleAndKeywords(args,keywds,"O|Osiii",splitter_args, \
+                                       &doc,&synstop,&encoding,&single_char,&index_numbers,&max_len)) return NULL;
+

-    UNLESS(PyArg_ParseTupleAndKeywords(args,keywds,"O|Os",splitter_args, &doc,&synstop,&encoding)) return NULL;
+    if (index_numbers<0 || index_numbers>1) {
+        PyErr_SetString(PyExc_ValueError,"indexnumbers must be 0 or 1");
+        return NULL;
+    }
+
+    if (single_char<0 || single_char>1) {
+        PyErr_SetString(PyExc_ValueError,"singlechar must be 0 or 1");
+        return NULL;
+    }
+
+    if (max_len<1 || max_len>128) {
+        PyErr_SetString(PyExc_ValueError,"maxlen must be between 1 and 128");
+        return NULL;
+    }

    UNLESS(self = PyObject_NEW(Splitter, &SplitterType)) return NULL;

@@ -430,6 +483,9 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject * keywds)
    self->end = self->here + PyString_Size(self->text);

    self->index = -1;
+    self->allow_single_chars = single_char;
+    self->index_numbers      = index_numbers;
+    self->max_len            = max_len;

    return (PyObject*)self;

@@ -442,7 +498,7 @@ err:
 static struct PyMethodDef Splitter_module_methods[] =
    {
        { "ZopeSplitter", (PyCFunction)get_Splitter, METH_VARARGS|METH_KEYWORDS,
-            "ZopeSplitter(doc[,synstop]) -- Return a word splitter"
+            "ZopeSplitter(doc[,synstop][,encoding][,singlechar][,indexnumbers][,maxlen]) -- Return a word splitter"
        },

        { NULL, NULL }
@@ -453,7 +509,7 @@ static char Splitter_module_documentation[] =
    "\n"
    "for use in an inverted index\n"
    "\n"
-    "$Id: ZopeSplitter.c,v 1.5 2001/11/28 15:51:04 matt Exp $\n"
+    "$Id: ZopeSplitter.c,v 1.6 2002/01/09 15:17:34 andreasjung Exp $\n"
    ;


@@ -461,7 +517,7 @@ void
 initZopeSplitter(void)
 {
    PyObject *m, *d;
-    char *rev="$Revision: 1.5 $";
+    char *rev="$Revision: 1.6 $";

    /* Create the module and add the functions */
    m = Py_InitModule4("ZopeSplitter", Splitter_module_methods,