Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
fc78f968
Commit
fc78f968
authored
Oct 31, 2005
by
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- converted ILexicon to z3 and bridged it back
- ZCTextIndex now accepts lexicons with the z3 interface
parent
6a3b2b6c
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
124 additions
and
78 deletions
+124
-78
lib/python/Products/ZCTextIndex/ILexicon.py
lib/python/Products/ZCTextIndex/ILexicon.py
+11
-58
lib/python/Products/ZCTextIndex/Lexicon.py
lib/python/Products/ZCTextIndex/Lexicon.py
+11
-3
lib/python/Products/ZCTextIndex/ZCTextIndex.py
lib/python/Products/ZCTextIndex/ZCTextIndex.py
+9
-6
lib/python/Products/ZCTextIndex/interfaces.py
lib/python/Products/ZCTextIndex/interfaces.py
+64
-0
lib/python/Products/ZCTextIndex/tests/testLexicon.py
lib/python/Products/ZCTextIndex/tests/testLexicon.py
+27
-8
lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py
lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py
+2
-3
No files found.
lib/python/Products/ZCTextIndex/ILexicon.py
View file @
fc78f968
...
...
@@ -8,68 +8,21 @@
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# FOR A PARTICULAR PURPOSE
.
#
##############################################################################
"""Lexicon z2 interfaces.
from
Interface
import
Interface
$Id$
"""
class
ILexicon
(
Interface
):
"""Object responsible for converting text to word identifiers."""
def
termToWordIds
(
text
):
"""Return a sequence of ids of the words parsed from the text.
# create ILexicon
from
Interface.bridge
import
createZope3Bridge
from
interfaces
import
ILexicon
as
z3ILexicon
import
ILexicon
The input text may be either a string or a list of strings.
createZope3Bridge
(
z3ILexicon
,
ILexicon
,
'ILexicon'
)
Parse the text as if they are search terms, and skips words
that aren't in the lexicon.
"""
def
sourceToWordIds
(
text
):
"""Return a sequence of ids of the words parsed from the text.
The input text may be either a string or a list of strings.
Parse the text as if they come from a source document, and
creates new word ids for words that aren't (yet) in the
lexicon.
"""
def
globToWordIds
(
pattern
):
"""Return a sequence of ids of words matching the pattern.
The argument should be a single word using globbing syntax,
e.g. 'foo*' meaning anything starting with 'foo'.
Return the wids for all words in the lexicon that match the
pattern.
"""
def
length
():
"""Return the number of unique term in the lexicon."""
def
get_word
(
wid
):
"""Return the word for the given word id.
Raise KeyError if the word id is not in the lexicon.
"""
def
get_wid
(
word
):
"""Return the wird id for the given word.
Return 0 of the word is not in the lexicon.
"""
def
parseTerms
(
text
):
"""Pass the text through the pipeline.
Return a list of words, normalized by the pipeline
(e.g. stopwords removed, case normalized etc.).
"""
def
isGlob
(
word
):
"""Return true if the word is a globbing pattern.
The word should be one of the words returned by parseTerm().
"""
del
createZope3Bridge
del
z3ILexicon
lib/python/Products/ZCTextIndex/Lexicon.py
View file @
fc78f968
...
...
@@ -8,9 +8,13 @@
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# FOR A PARTICULAR PURPOSE
.
#
##############################################################################
"""Lexicon.
$Id$
"""
import
re
...
...
@@ -20,15 +24,19 @@ from BTrees.Length import Length
import
ZODB
from
Persistence
import
Persistent
from
zope.interface
import
implements
from
Products.ZCTextIndex.ILexicon
import
ILexicon
from
Products.ZCTextIndex.StopDict
import
get_stopdict
from
Products.ZCTextIndex.ParseTree
import
QueryError
from
Products.ZCTextIndex.PipelineFactory
import
element_factory
from
ILexicon
import
ILexicon
as
z2ILexicon
from
interfaces
import
ILexicon
class
Lexicon
(
Persistent
):
__implements__
=
ILexicon
__implements__
=
z2ILexicon
implements
(
ILexicon
)
def
__init__
(
self
,
*
pipeline
):
self
.
_wids
=
OIBTree
()
# word -> wid
...
...
lib/python/Products/ZCTextIndex/ZCTextIndex.py
View file @
fc78f968
...
...
@@ -33,17 +33,18 @@ from Products.PluginIndexes.common.util import parseIndexRequest
from
Products.PluginIndexes.common
import
safe_callable
from
Products.PluginIndexes.interfaces
import
IPluggableIndex
from
Products.ZCTextIndex.ILexicon
import
ILexicon
from
Products.ZCTextIndex.Lexicon
import
\
Lexicon
,
Splitter
,
CaseNormalizer
,
StopWordRemover
from
Products.ZCTextIndex.NBest
import
NBest
from
Products.ZCTextIndex.QueryParser
import
QueryParser
from
PipelineFactory
import
element_factory
from
CosineIndex
import
CosineIndex
from
ILexicon
import
ILexicon
as
z2ILexicon
from
interfaces
import
ILexicon
from
interfaces
import
IZCLexicon
from
interfaces
import
IZCTextIndex
from
OkapiIndex
import
OkapiIndex
from
PipelineFactory
import
element_factory
from
Products.ZCTextIndex.CosineIndex
import
CosineIndex
from
Products.ZCTextIndex.OkapiIndex
import
OkapiIndex
index_types
=
{
'Okapi BM25 Rank'
:
OkapiIndex
,
'Cosine Measure'
:
CosineIndex
}
...
...
@@ -89,7 +90,8 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
if
lexicon
is
None
:
raise
LookupError
,
'Lexicon "%s" not found'
%
escape
(
lexicon_id
)
if
not
ILexicon
.
isImplementedBy
(
lexicon
):
if
not
(
ILexicon
.
providedBy
(
lexicon
)
or
z2ILexicon
.
isImplementedBy
(
lexicon
)):
raise
ValueError
(
'Object "%s" does not implement '
'ZCTextIndex Lexicon interface'
%
lexicon
.
getId
())
...
...
@@ -134,7 +136,8 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
return
self
.
_v_lexicon
except
AttributeError
:
lexicon
=
getattr
(
aq_parent
(
aq_inner
(
self
)),
self
.
lexicon_id
)
if
not
ILexicon
.
isImplementedBy
(
lexicon
):
if
not
(
ILexicon
.
providedBy
(
lexicon
)
or
z2ILexicon
.
isImplementedBy
(
lexicon
)):
raise
TypeError
(
'Object "%s" is not a ZCTextIndex Lexicon'
%
repr
(
lexicon
))
self
.
_v_lexicon
=
lexicon
...
...
lib/python/Products/ZCTextIndex/interfaces.py
View file @
fc78f968
...
...
@@ -24,6 +24,70 @@ class IZCTextIndex(Interface):
"""
class
ILexicon
(
Interface
):
"""Object responsible for converting text to word identifiers.
"""
def
termToWordIds
(
text
):
"""Return a sequence of ids of the words parsed from the text.
The input text may be either a string or a list of strings.
Parse the text as if they are search terms, and skips words
that aren't in the lexicon.
"""
def
sourceToWordIds
(
text
):
"""Return a sequence of ids of the words parsed from the text.
The input text may be either a string or a list of strings.
Parse the text as if they come from a source document, and
creates new word ids for words that aren't (yet) in the
lexicon.
"""
def
globToWordIds
(
pattern
):
"""Return a sequence of ids of words matching the pattern.
The argument should be a single word using globbing syntax,
e.g. 'foo*' meaning anything starting with 'foo'.
Return the wids for all words in the lexicon that match the
pattern.
"""
def
length
():
"""Return the number of unique term in the lexicon.
"""
def
get_word
(
wid
):
"""Return the word for the given word id.
Raise KeyError if the word id is not in the lexicon.
"""
def
get_wid
(
word
):
"""Return the wird id for the given word.
Return 0 of the word is not in the lexicon.
"""
def
parseTerms
(
text
):
"""Pass the text through the pipeline.
Return a list of words, normalized by the pipeline
(e.g. stopwords removed, case normalized etc.).
"""
def
isGlob
(
word
):
"""Return true if the word is a globbing pattern.
The word should be one of the words returned by parseTerm().
"""
class
IZCLexicon
(
Interface
):
"""Lexicon for ZCTextIndex.
...
...
lib/python/Products/ZCTextIndex/tests/testLexicon.py
View file @
fc78f968
...
...
@@ -8,12 +8,17 @@
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# FOR A PARTICULAR PURPOSE
.
#
##############################################################################
"""Lexicon unit tests.
$Id$
"""
import
unittest
import
os
,
sys
from
unittest
import
TestCase
,
TestSuite
,
main
,
makeSuite
import
ZODB
import
transaction
...
...
@@ -64,7 +69,20 @@ class StopWordPipelineElement:
return
res
class
Test
(
TestCase
):
class
Test
(
unittest
.
TestCase
):
def
test_z2interfaces
(
self
):
from
Interface.Verify
import
verifyClass
from
Products.ZCTextIndex.ILexicon
import
ILexicon
verifyClass
(
ILexicon
,
Lexicon
)
def
test_z3interfaces
(
self
):
from
Products.ZCTextIndex.interfaces
import
ILexicon
from
zope.interface.verify
import
verifyClass
verifyClass
(
ILexicon
,
Lexicon
)
def
testSourceToWordIds
(
self
):
lexicon
=
Lexicon
(
Splitter
())
wids
=
lexicon
.
sourceToWordIds
(
'cats and dogs'
)
...
...
@@ -145,7 +163,7 @@ class Test(TestCase):
lexicon
.
sourceToWordIds
(
'how now brown cow'
)
self
.
assert_
(
lexicon
.
length
.
__class__
is
Length
)
class
TestLexiconConflict
(
TestCase
):
class
TestLexiconConflict
(
unittest
.
TestCase
):
db
=
None
...
...
@@ -186,11 +204,12 @@ class TestLexiconConflict(TestCase):
self
.
assertEqual
(
copy
.
length
(),
11
)
self
.
assertEqual
(
copy
.
length
(),
len
(
copy
.
_words
))
def
test_suite
():
suite
=
TestSuite
()
suite
.
addTest
(
makeSuite
(
Test
))
suite
.
addTest
(
makeSuite
(
TestLexiconConflict
))
suite
=
unittest
.
TestSuite
()
suite
.
addTest
(
unittest
.
makeSuite
(
Test
))
suite
.
addTest
(
unittest
.
makeSuite
(
TestLexiconConflict
))
return
suite
if
__name__
==
'__main__'
:
main
(
defaultTest
=
'test_suite'
)
unittest
.
main
(
defaultTest
=
'test_suite'
)
lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py
View file @
fc78f968
...
...
@@ -17,9 +17,6 @@ $Id$
"""
import
unittest
import
Testing
import
Zope2
Zope2
.
startup
()
import
re
...
...
@@ -577,9 +574,11 @@ class OkapiQueryTests(QueryTestsBase):
class PLexiconTests(unittest.TestCase):
def test_z3interfaces(self):
from Products.ZCTextIndex.interfaces import ILexicon
from Products.ZCTextIndex.interfaces import IZCLexicon
from zope.interface.verify import verifyClass
verifyClass(ILexicon, PLexicon)
verifyClass(IZCLexicon, PLexicon)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment