Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
17f69863
Commit
17f69863
authored
Jan 09, 2002
by
Andreas Jung
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added 3 new parameters for all zope splitters
parent
fc443b19
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
245 additions
and
83 deletions
+245
-83
doc/CHANGES.txt
doc/CHANGES.txt
+13
-0
lib/python/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/src/ISO_8859_1_Splitter.c
...ex/Splitter/ISO_8859_1_Splitter/src/ISO_8859_1_Splitter.c
+69
-14
lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/src/UnicodeSplitter.c
.../TextIndex/Splitter/UnicodeSplitter/src/UnicodeSplitter.c
+59
-21
lib/python/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/src/ZopeSplitter.c
...ndexes/TextIndex/Splitter/ZopeSplitter/src/ZopeSplitter.c
+104
-48
No files found.
doc/CHANGES.txt
View file @
17f69863
...
...
@@ -27,6 +27,19 @@ Zope Changes
Features Added
- TextIndex/Splitters: the constructor of all three splitters
has now three new optional parameters:
'maxlen'=(1-256) - to specify the maximum length of
splitted words
'singlechar'=(1|0) - allows single characters to be indexed
'indexnumbers'=(1|0)- allows numbers to be indexed
The default values of all parameters reflect the standard
behaviour.
- Enhancements to utilites/requestprofiler.py:
Added readstats and writestats features which allow for saves and
...
...
lib/python/Products/PluginIndexes/TextIndex/Splitter/ISO_8859_1_Splitter/src/ISO_8859_1_Splitter.c
View file @
17f69863
...
...
@@ -32,6 +32,9 @@ typedef struct
PyObject
*
text
,
*
synstop
;
char
*
here
,
*
end
;
int
index
;
int
allow_single_chars
;
int
index_numbers
;
int
max_len
;
}
Splitter
;
...
...
@@ -117,6 +120,32 @@ Splitter_length(Splitter *self)
return
self
->
index
+
1
;
}
static
PyObject
*
Splitter_split
(
Splitter
*
self
)
{
PyObject
*
list
=
NULL
,
*
word
=
NULL
;
UNLESS
(
list
=
PyList_New
(
0
))
return
NULL
;
Splitter_reset
(
self
);
while
(
1
)
{
Py_XDECREF
(
word
);
UNLESS
(
word
=
next_word
(
self
,
NULL
,
NULL
))
return
NULL
;
if
(
word
==
Py_None
)
{
return
list
;
}
PyList_Append
(
list
,
word
);
}
return
list
;
}
static
PyObject
*
Splitter_concat
(
Splitter
*
self
,
PyObject
*
other
)
{
...
...
@@ -155,7 +184,7 @@ check_synstop(Splitter *self, PyObject *word)
len
=
PyString_Size
(
word
);
if
(
len
<
2
)
/* Single-letter words are stop words! */
if
(
len
<
2
&&
!
self
->
allow_single_chars
)
/* Single-letter words are stop words! */
{
Py_INCREF
(
Py_None
);
return
Py_None
;
...
...
@@ -167,7 +196,7 @@ check_synstop(Splitter *self, PyObject *word)
for
(;
--
len
>=
0
&&
!
isalpha
((
unsigned
char
)
cword
[
len
]);
)
;
if
(
len
<
0
)
{
if
(
len
<
0
&&
!
self
->
index_numbers
)
{
Py_INCREF
(
Py_None
);
return
Py_None
;
}
...
...
@@ -197,12 +226,11 @@ check_synstop(Splitter *self, PyObject *word)
return
value
;
/* Which must be None! */
}
#define MAX_WORD 64
/* Words longer than MAX_WORD are stemmed */
static
PyObject
*
next_word
(
Splitter
*
self
,
char
**
startpos
,
char
**
endpos
)
{
char
wbuf
[
MAX_WORD
];
char
wbuf
[
256
];
char
*
end
,
*
here
,
*
b
;
int
i
=
0
,
c
;
PyObject
*
pyword
,
*
res
;
...
...
@@ -232,13 +260,13 @@ next_word(Splitter *self, char **startpos, char **endpos)
if
(
startpos
&&
i
==
0
)
*
startpos
=
here
;
if
(
i
++
<
MAX_WORD
)
if
(
i
++
<
self
->
max_len
)
*
b
++
=
c
;
}
else
if
(
i
!=
0
)
{
/* We've found the end of a word */
if
(
i
>=
MAX_WORD
)
i
=
MAX_WORD
;
/* "stem" the long word */
if
(
i
>=
self
->
max_len
)
i
=
self
->
max_len
;
/* "stem" the long word */
UNLESS
(
pyword
=
PyString_FromStringAndSize
(
wbuf
,
i
))
{
self
->
here
=
here
;
...
...
@@ -282,8 +310,8 @@ next_word(Splitter *self, char **startpos, char **endpos)
/* We've reached the end of the string */
if
(
i
>=
MAX_WORD
)
i
=
MAX_WORD
;
/* "stem" the long word */
if
(
i
>=
self
->
max_len
)
i
=
self
->
max_len
;
/* "stem" the long word */
if
(
i
==
0
)
{
/* No words */
...
...
@@ -416,6 +444,9 @@ err:
static
struct
PyMethodDef
Splitter_methods
[]
=
{
{
"split"
,
(
PyCFunction
)
Splitter_split
,
0
,
"split() -- Split the string in one run"
},
{
"pos"
,
(
PyCFunction
)
Splitter_pos
,
0
,
"pos(index) -- Return the starting and ending position of a token"
},
...
...
@@ -459,7 +490,7 @@ static PyTypeObject SplitterType = {
SplitterType__doc__
/* Documentation string */
};
static
char
*
splitter_args
[]
=
{
"doc"
,
"synstop"
,
"encoding"
,
NULL
};
static
char
*
splitter_args
[]
=
{
"doc"
,
"synstop"
,
"encoding"
,
"singlechar"
,
"indexnumbers"
,
"maxlen"
,
NULL
};
static
PyObject
*
get_Splitter
(
PyObject
*
modinfo
,
PyObject
*
args
,
PyObject
*
keywds
)
...
...
@@ -467,8 +498,29 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
Splitter
*
self
;
PyObject
*
doc
,
*
synstop
=
NULL
;
char
*
encoding
=
"latin1"
;
int
single_char
=
0
;
int
index_numbers
=
0
;
int
max_len
=
64
;
UNLESS
(
PyArg_ParseTupleAndKeywords
(
args
,
keywds
,
"O|Osiii"
,
splitter_args
,
&
doc
,
&
synstop
,
&
encoding
,
&
single_char
,
&
index_numbers
,
&
max_len
))
return
NULL
;
if
(
index_numbers
<
0
||
index_numbers
>
1
)
{
PyErr_SetString
(
PyExc_ValueError
,
"indexnumbers must be 0 or 1"
);
return
NULL
;
}
if
(
single_char
<
0
||
single_char
>
1
)
{
PyErr_SetString
(
PyExc_ValueError
,
"singlechar must be 0 or 1"
);
return
NULL
;
}
if
(
max_len
<
1
||
max_len
>
128
)
{
PyErr_SetString
(
PyExc_ValueError
,
"maxlen must be between 1 and 128"
);
return
NULL
;
}
UNLESS
(
PyArg_ParseTupleAndKeywords
(
args
,
keywds
,
"O|Os"
,
splitter_args
,
&
doc
,
&
synstop
,
&
encoding
))
return
NULL
;
UNLESS
(
self
=
PyObject_NEW
(
Splitter
,
&
SplitterType
))
return
NULL
;
...
...
@@ -484,6 +536,9 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
UNLESS
(
self
->
here
=
PyString_AsString
(
self
->
text
))
goto
err
;
self
->
end
=
self
->
here
+
PyString_Size
(
self
->
text
);
self
->
allow_single_chars
=
single_char
;
self
->
index_numbers
=
index_numbers
;
self
->
max_len
=
max_len
;
self
->
index
=
-
1
;
...
...
@@ -498,7 +553,7 @@ err:
static
struct
PyMethodDef
Splitter_module_methods
[]
=
{
{
"ISO_8859_1_Splitter"
,
(
PyCFunction
)
get_Splitter
,
METH_VARARGS
|
METH_KEYWORDS
,
"ISO_8859_1_Splitter(doc[,synstop
]) -- Return a word splitter"
"ISO_8859_1_Splitter(doc[,synstop][,encoding][,singlechar][,indexnumbers][,maxlen
]) -- Return a word splitter"
},
{
NULL
,
NULL
}
...
...
@@ -509,7 +564,7 @@ static char Splitter_module_documentation[] =
"
\n
"
"for use in an inverted index
\n
"
"
\n
"
"$Id: ISO_8859_1_Splitter.c,v 1.
5 2001/11/28 15:51:04 matt
Exp $
\n
"
"$Id: ISO_8859_1_Splitter.c,v 1.
6 2002/01/09 15:17:34 andreasjung
Exp $
\n
"
;
...
...
@@ -518,7 +573,7 @@ void
initISO_8859_1_Splitter
(
void
)
{
PyObject
*
m
,
*
d
;
char
*
rev
=
"$Revision: 1.
5
$"
;
char
*
rev
=
"$Revision: 1.
6
$"
;
/* Create the module and add the functions */
initSplitterTrtabs
();
...
...
lib/python/Products/PluginIndexes/TextIndex/Splitter/UnicodeSplitter/src/UnicodeSplitter.c
View file @
17f69863
...
...
@@ -13,8 +13,6 @@
#include "Python.h"
#define MAX_WORD 64
/* Words longer than MAX_WORD are stemmed */
#ifndef min
#define min(a,b) ((a)<(b)?(a):(b))
#endif
...
...
@@ -24,8 +22,12 @@ typedef struct
PyObject_HEAD
PyObject
*
list
;
PyObject
*
synstop
;
int
max_len
;
int
allow_single_chars
;
int
index_numbers
;
}
Splitter
;
static
PyUnicodeObject
*
prepareString
(
PyUnicodeObject
*
o
);
...
...
@@ -34,6 +36,9 @@ static PyObject *checkSynword(Splitter *self, PyObject *word)
/* Always returns a borrowed reference */
PyObject
*
value
;
if
(
PyUnicode_GetSize
(
word
)
==
1
&&
!
self
->
allow_single_chars
)
return
Py_None
;
if
(
self
->
synstop
)
{
value
=
PyDict_GetItem
(
self
->
synstop
,
word
);
if
(
value
!=
NULL
)
{
...
...
@@ -82,6 +87,14 @@ Splitter_item(Splitter *self, int i)
return
item
;
}
static
PyObject
*
Splitter_split
(
Splitter
*
self
)
{
Py_INCREF
(
self
->
list
);
return
self
->
list
;
}
static
PyObject
*
Splitter_indexes
(
Splitter
*
self
,
PyObject
*
args
)
...
...
@@ -133,6 +146,8 @@ Splitter_pos(Splitter *self, PyObject *args)
static
struct
PyMethodDef
Splitter_methods
[]
=
{
{
"split"
,
(
PyCFunction
)
Splitter_split
,
0
,
"split() -- Split string in one run"
},
{
"indexes"
,
(
PyCFunction
)
Splitter_indexes
,
METH_VARARGS
,
"indexes(word) -- Return a list of the indexes of word in the sequence"
,
},
...
...
@@ -198,14 +213,19 @@ static int splitUnicodeString(Splitter *self,PyUnicodeObject *doc)
register
Py_UNICODE
ch
;
ch
=
*
s
;
#ifdef DEBUG
printf
(
"%d %c %d
\n
"
,
i
,
ch
,
ch
);
fflush
(
stdout
);
#endif
if
(
!
inside_word
)
{
if
(
Py_UNICODE_ISALPHA
(
ch
))
{
inside_word
=
1
;
start
=
i
;
if
(
self
->
index_numbers
)
{
if
(
Py_UNICODE_ISALNUM
(
ch
))
{
inside_word
=
1
;
start
=
i
;
}
}
else
{
if
(
Py_UNICODE_ISALPHA
(
ch
))
{
inside_word
=
1
;
start
=
i
;
}
}
}
else
{
...
...
@@ -213,7 +233,7 @@ static int splitUnicodeString(Splitter *self,PyUnicodeObject *doc)
inside_word
=
0
;
word
=
PySequence_GetSlice
((
PyObject
*
)
doc1
,
start
,
min
(
i
,
start
+
MAX_WORD
));
min
(
i
,
start
+
self
->
max_len
));
if
(
word
==
NULL
)
goto
err
;
...
...
@@ -234,7 +254,7 @@ static int splitUnicodeString(Splitter *self,PyUnicodeObject *doc)
if
(
inside_word
)
{
word
=
PySequence_GetSlice
((
PyObject
*
)
doc1
,
start
,
min
(
len
,
start
+
MAX_WORD
));
min
(
len
,
start
+
self
->
max_len
));
if
(
word
==
NULL
)
goto
err
;
...
...
@@ -288,7 +308,7 @@ PyUnicodeObject *prepareString(PyUnicodeObject *o)
return
u
;
}
static
char
*
splitter_args
[]
=
{
"doc"
,
"synstop"
,
"encoding"
,
NULL
};
static
char
*
splitter_args
[]
=
{
"doc"
,
"synstop"
,
"encoding"
,
"indexnumbers"
,
"singlechar"
,
"maxlen"
,
NULL
};
static
PyObject
*
...
...
@@ -297,9 +317,11 @@ newSplitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
Splitter
*
self
=
NULL
;
PyObject
*
doc
=
NULL
,
*
unicodedoc
=
NULL
,
*
synstop
=
NULL
;
char
*
encoding
=
"latin1"
;
int
index_numbers
=
0
;
int
max_len
=
64
;
int
single_char
=
0
;
if
(
!
(
self
=
PyObject_NEW
(
Splitter
,
&
SplitterType
)))
return
NULL
;
if
(
!
(
PyArg_ParseTupleAndKeywords
(
args
,
keywds
,
"O|Os"
,
splitter_args
,
&
doc
,
&
synstop
,
&
encoding
)))
return
NULL
;
if
(
!
(
PyArg_ParseTupleAndKeywords
(
args
,
keywds
,
"O|Osiii"
,
splitter_args
,
&
doc
,
&
synstop
,
&
encoding
,
&
index_numbers
,
&
single_char
,
&
max_len
)))
return
NULL
;
#ifdef DEBUG
puts
(
"got text"
);
...
...
@@ -307,6 +329,21 @@ newSplitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
fflush
(
stdout
);
#endif
if
(
index_numbers
<
0
||
index_numbers
>
1
)
{
PyErr_SetString
(
PyExc_ValueError
,
"indexnumbers must be 0 or 1"
);
return
NULL
;
}
if
(
single_char
<
0
||
single_char
>
1
)
{
PyErr_SetString
(
PyExc_ValueError
,
"singlechar must be 0 or 1"
);
return
NULL
;
}
if
(
max_len
<
1
||
max_len
>
128
)
{
PyErr_SetString
(
PyExc_ValueError
,
"maxlen must be between 1 and 128"
);
return
NULL
;
}
if
(
PyString_Check
(
doc
))
{
unicodedoc
=
PyUnicode_FromEncodedObject
(
doc
,
encoding
,
"strict"
);
...
...
@@ -324,11 +361,17 @@ newSplitter(PyObject *modinfo, PyObject *args,PyObject *keywds)
return
NULL
;
}
if
(
!
(
self
=
PyObject_NEW
(
Splitter
,
&
SplitterType
)))
return
NULL
;
if
(
synstop
)
{
self
->
synstop
=
synstop
;
Py_INCREF
(
synstop
);
}
else
self
->
synstop
=
NULL
;
self
->
index_numbers
=
index_numbers
;
self
->
max_len
=
max_len
;
self
->
allow_single_chars
=
single_char
;
if
((
splitUnicodeString
(
self
,(
PyUnicodeObject
*
)
unicodedoc
))
<
0
)
goto
err
;
...
...
@@ -344,11 +387,6 @@ err:
static
struct
PyMethodDef
Splitter_module_methods
[]
=
{
{
"pos"
,
(
PyCFunction
)
Splitter_pos
,
0
,
"pos(index) -- Return the starting and ending position of a token"
},
{
"indexes"
,
(
PyCFunction
)
Splitter_indexes
,
METH_VARARGS
,
"indexes(word) -- Return a list of the indexes of word in sequence"
},
{
"UnicodeSplitter"
,
(
PyCFunction
)
newSplitter
,
METH_VARARGS
|
METH_KEYWORDS
,
"UnicodeSplitter(doc[,synstop][,encoding='latin1']) "
...
...
@@ -362,7 +400,7 @@ static char Splitter_module_documentation[] =
"
\n
"
"for use in an inverted index
\n
"
"
\n
"
"$Id: UnicodeSplitter.c,v 1.1
2 2001/11/28 15:51:04 matt
Exp $
\n
"
"$Id: UnicodeSplitter.c,v 1.1
3 2002/01/09 15:17:34 andreasjung
Exp $
\n
"
;
...
...
@@ -370,7 +408,7 @@ void
initUnicodeSplitter
(
void
)
{
PyObject
*
m
,
*
d
;
char
*
rev
=
"$Revision: 1.1
2
$"
;
char
*
rev
=
"$Revision: 1.1
3
$"
;
/* Create the module and add the functions */
m
=
Py_InitModule4
(
"UnicodeSplitter"
,
Splitter_module_methods
,
...
...
lib/python/Products/PluginIndexes/TextIndex/Splitter/ZopeSplitter/src/ZopeSplitter.c
View file @
17f69863
/*****************************************************************************
Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
This software is subject to the provisions of the Zope Public License,
...
...
@@ -10,6 +10,8 @@
FOR A PARTICULAR PURPOSE
****************************************************************************/
#include "Python.h"
#include <ctype.h>
...
...
@@ -23,6 +25,9 @@ typedef struct
PyObject
*
text
,
*
synstop
;
char
*
here
,
*
end
;
int
index
;
int
allow_single_chars
;
int
index_numbers
;
int
max_len
;
}
Splitter
;
...
...
@@ -98,7 +103,7 @@ check_synstop(Splitter *self, PyObject *word)
cword
=
PyString_AsString
(
word
);
len
=
PyString_Size
(
word
);
if
(
len
<
2
)
/* Single-letter words are stop words! */
if
(
len
<
2
&&
!
self
->
allow_single_chars
)
/* Single-letter words are stop words! */
{
Py_INCREF
(
Py_None
);
return
Py_None
;
...
...
@@ -110,7 +115,7 @@ check_synstop(Splitter *self, PyObject *word)
for
(;
--
len
>=
0
&&
!
isalpha
((
unsigned
char
)
cword
[
len
]);
)
;
if
(
len
<
0
)
{
if
(
len
<
0
&&
!
self
->
index_numbers
)
{
Py_INCREF
(
Py_None
);
return
Py_None
;
}
...
...
@@ -140,12 +145,11 @@ check_synstop(Splitter *self, PyObject *word)
return
value
;
/* Which must be None! */
}
#define MAX_WORD 64
/* Words longer than MAX_WORD are stemmed */
static
PyObject
*
next_word
(
Splitter
*
self
,
char
**
startpos
,
char
**
endpos
)
{
char
wbuf
[
MAX_WORD
];
char
wbuf
[
256
];
char
*
end
,
*
here
,
*
b
;
int
i
=
0
,
c
;
PyObject
*
pyword
,
*
res
;
...
...
@@ -175,13 +179,13 @@ next_word(Splitter *self, char **startpos, char **endpos)
if
(
startpos
&&
i
==
0
)
*
startpos
=
here
;
if
(
i
++
<
MAX_WORD
)
if
(
i
++
<
self
->
max_len
)
*
b
++
=
c
;
}
else
if
(
i
!=
0
)
{
/* We've found the end of a word */
if
(
i
>=
MAX_WORD
)
i
=
MAX_WORD
;
/* "stem" the long word */
if
(
i
>=
self
->
max_len
)
i
=
self
->
max_len
;
/* "stem" the long word */
UNLESS
(
pyword
=
PyString_FromStringAndSize
(
wbuf
,
i
))
{
self
->
here
=
here
;
...
...
@@ -225,8 +229,8 @@ next_word(Splitter *self, char **startpos, char **endpos)
/* We've reached the end of the string */
if
(
i
>=
MAX_WORD
)
i
=
MAX_WORD
;
/* "stem" the long word */
if
(
i
>=
self
->
max_len
)
i
=
self
->
max_len
;
/* "stem" the long word */
if
(
i
==
0
)
{
/* No words */
...
...
@@ -274,6 +278,31 @@ Splitter_item(Splitter *self, int i)
return
word
;
}
static
PyObject
*
Splitter_split
(
Splitter
*
self
)
{
PyObject
*
list
=
NULL
,
*
word
=
NULL
;
UNLESS
(
list
=
PyList_New
(
0
))
return
NULL
;
Splitter_reset
(
self
);
while
(
1
)
{
Py_XDECREF
(
word
);
UNLESS
(
word
=
next_word
(
self
,
NULL
,
NULL
))
return
NULL
;
if
(
word
==
Py_None
)
{
return
list
;
}
PyList_Append
(
list
,
word
);
}
return
list
;
}
static
PyObject
*
Splitter_slice
(
Splitter
*
self
,
int
i
,
int
j
)
{
...
...
@@ -282,14 +311,14 @@ Splitter_slice(Splitter *self, int i, int j)
}
static
PySequenceMethods
Splitter_as_sequence
=
{
(
inquiry
)
Splitter_length
,
/*sq_length*/
(
binaryfunc
)
Splitter_concat
,
/*sq_concat*/
(
intargfunc
)
Splitter_repeat
,
/*sq_repeat*/
(
intargfunc
)
Splitter_item
,
/*sq_item*/
(
intintargfunc
)
Splitter_slice
,
/*sq_slice*/
(
intobjargproc
)
0
,
/*sq_ass_item*/
(
intintobjargproc
)
0
,
/*sq_ass_slice*/
};
(
inquiry
)
Splitter_length
,
/*sq_length*/
(
binaryfunc
)
Splitter_concat
,
/*sq_concat*/
(
intargfunc
)
Splitter_repeat
,
/*sq_repeat*/
(
intargfunc
)
Splitter_item
,
/*sq_item*/
(
intintargfunc
)
Splitter_slice
,
/*sq_slice*/
(
intobjargproc
)
0
,
/*sq_ass_item*/
(
intintobjargproc
)
0
,
/*sq_ass_slice*/
};
static
PyObject
*
Splitter_pos
(
Splitter
*
self
,
PyObject
*
args
)
...
...
@@ -359,8 +388,12 @@ err:
static
struct
PyMethodDef
Splitter_methods
[]
=
{
{
"split"
,
(
PyCFunction
)
Splitter_split
,
0
,
"split() -- Split complete string in one run"
},
{
"pos"
,
(
PyCFunction
)
Splitter_pos
,
0
,
"pos(index) -- Return the starting and ending position of a token"
"pos(index) -- Return the starting and ending position of a token"
},
{
"indexes"
,
(
PyCFunction
)
Splitter_indexes
,
METH_VARARGS
,
...
...
@@ -378,31 +411,31 @@ Splitter_getattr(Splitter *self, char *name)
static
char
SplitterType__doc__
[]
=
""
;
static
PyTypeObject
SplitterType
=
{
PyObject_HEAD_INIT
(
NULL
)
0
,
/*ob_size*/
"Splitter"
,
/*tp_name*/
sizeof
(
Splitter
),
/*tp_basicsize*/
0
,
/*tp_itemsize*/
/* methods */
(
destructor
)
Splitter_dealloc
,
/*tp_dealloc*/
(
printfunc
)
0
,
/*tp_print*/
(
getattrfunc
)
Splitter_getattr
,
/*tp_getattr*/
(
setattrfunc
)
0
,
/*tp_setattr*/
(
cmpfunc
)
0
,
/*tp_compare*/
(
reprfunc
)
0
,
/*tp_repr*/
0
,
/*tp_as_number*/
&
Splitter_as_sequence
,
/*tp_as_sequence*/
0
,
/*tp_as_mapping*/
(
hashfunc
)
0
,
/*tp_hash*/
(
ternaryfunc
)
0
,
/*tp_call*/
(
reprfunc
)
0
,
/*tp_str*/
/* Space for future expansion */
0L
,
0L
,
0L
,
0L
,
SplitterType__doc__
/* Documentation string */
};
static
char
*
splitter_args
[]
=
{
"doc"
,
"synstop"
,
"encoding"
,
NULL
};
PyObject_HEAD_INIT
(
NULL
)
0
,
/*ob_size*/
"Splitter"
,
/*tp_name*/
sizeof
(
Splitter
),
/*tp_basicsize*/
0
,
/*tp_itemsize*/
/* methods */
(
destructor
)
Splitter_dealloc
,
/*tp_dealloc*/
(
printfunc
)
0
,
/*tp_print*/
(
getattrfunc
)
Splitter_getattr
,
/*tp_getattr*/
(
setattrfunc
)
0
,
/*tp_setattr*/
(
cmpfunc
)
0
,
/*tp_compare*/
(
reprfunc
)
0
,
/*tp_repr*/
0
,
/*tp_as_number*/
&
Splitter_as_sequence
,
/*tp_as_sequence*/
0
,
/*tp_as_mapping*/
(
hashfunc
)
0
,
/*tp_hash*/
(
ternaryfunc
)
0
,
/*tp_call*/
(
reprfunc
)
0
,
/*tp_str*/
/* Space for future expansion */
0L
,
0L
,
0L
,
0L
,
SplitterType__doc__
/* Documentation string */
};
static
char
*
splitter_args
[]
=
{
"doc"
,
"synstop"
,
"encoding"
,
"singlechar"
,
"indexnumbers"
,
"maxlen"
,
NULL
};
static
PyObject
*
...
...
@@ -411,8 +444,28 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject * keywds)
Splitter
*
self
;
PyObject
*
doc
,
*
synstop
=
NULL
;
char
*
encoding
=
"latin1"
;
int
single_char
=
0
;
int
index_numbers
=
0
;
int
max_len
=
64
;
UNLESS
(
PyArg_ParseTupleAndKeywords
(
args
,
keywds
,
"O|Osiii"
,
splitter_args
,
\
&
doc
,
&
synstop
,
&
encoding
,
&
single_char
,
&
index_numbers
,
&
max_len
))
return
NULL
;
UNLESS
(
PyArg_ParseTupleAndKeywords
(
args
,
keywds
,
"O|Os"
,
splitter_args
,
&
doc
,
&
synstop
,
&
encoding
))
return
NULL
;
if
(
index_numbers
<
0
||
index_numbers
>
1
)
{
PyErr_SetString
(
PyExc_ValueError
,
"indexnumbers must be 0 or 1"
);
return
NULL
;
}
if
(
single_char
<
0
||
single_char
>
1
)
{
PyErr_SetString
(
PyExc_ValueError
,
"singlechar must be 0 or 1"
);
return
NULL
;
}
if
(
max_len
<
1
||
max_len
>
128
)
{
PyErr_SetString
(
PyExc_ValueError
,
"maxlen must be between 1 and 128"
);
return
NULL
;
}
UNLESS
(
self
=
PyObject_NEW
(
Splitter
,
&
SplitterType
))
return
NULL
;
...
...
@@ -430,6 +483,9 @@ get_Splitter(PyObject *modinfo, PyObject *args,PyObject * keywds)
self
->
end
=
self
->
here
+
PyString_Size
(
self
->
text
);
self
->
index
=
-
1
;
self
->
allow_single_chars
=
single_char
;
self
->
index_numbers
=
index_numbers
;
self
->
max_len
=
max_len
;
return
(
PyObject
*
)
self
;
...
...
@@ -442,7 +498,7 @@ err:
static
struct
PyMethodDef
Splitter_module_methods
[]
=
{
{
"ZopeSplitter"
,
(
PyCFunction
)
get_Splitter
,
METH_VARARGS
|
METH_KEYWORDS
,
"ZopeSplitter(doc[,synstop]) -- Return a word splitter"
"ZopeSplitter(doc[,synstop]
[,encoding][,singlechar][,indexnumbers][,maxlen]
) -- Return a word splitter"
},
{
NULL
,
NULL
}
...
...
@@ -453,7 +509,7 @@ static char Splitter_module_documentation[] =
"
\n
"
"for use in an inverted index
\n
"
"
\n
"
"$Id: ZopeSplitter.c,v 1.
5 2001/11/28 15:51:04 matt
Exp $
\n
"
"$Id: ZopeSplitter.c,v 1.
6 2002/01/09 15:17:34 andreasjung
Exp $
\n
"
;
...
...
@@ -461,7 +517,7 @@ void
initZopeSplitter
(
void
)
{
PyObject
*
m
,
*
d
;
char
*
rev
=
"$Revision: 1.
5
$"
;
char
*
rev
=
"$Revision: 1.
6
$"
;
/* Create the module and add the functions */
m
=
Py_InitModule4
(
"ZopeSplitter"
,
Splitter_module_methods
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment