Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
a876b335
Commit
a876b335
authored
Oct 31, 2005
by
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- converted ILexicon to z3 and bridged it back
- ZCTextIndex now accepts lexicons with the z3 interface
parent
6c6e563f
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
124 additions
and
78 deletions
+124
-78
lib/python/Products/ZCTextIndex/ILexicon.py
lib/python/Products/ZCTextIndex/ILexicon.py
+11
-58
lib/python/Products/ZCTextIndex/Lexicon.py
lib/python/Products/ZCTextIndex/Lexicon.py
+11
-3
lib/python/Products/ZCTextIndex/ZCTextIndex.py
lib/python/Products/ZCTextIndex/ZCTextIndex.py
+9
-6
lib/python/Products/ZCTextIndex/interfaces.py
lib/python/Products/ZCTextIndex/interfaces.py
+64
-0
lib/python/Products/ZCTextIndex/tests/testLexicon.py
lib/python/Products/ZCTextIndex/tests/testLexicon.py
+27
-8
lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py
lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py
+2
-3
No files found.
lib/python/Products/ZCTextIndex/ILexicon.py
View file @
a876b335
...
@@ -8,68 +8,21 @@
...
@@ -8,68 +8,21 @@
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# FOR A PARTICULAR PURPOSE
.
#
#
##############################################################################
##############################################################################
"""Lexicon z2 interfaces.
from
Interface
import
Interface
$Id$
"""
class
ILexicon
(
Interface
):
"""Object responsible for converting text to word identifiers."""
def
termToWordIds
(
text
):
# create ILexicon
"""Return a sequence of ids of the words parsed from the text.
from
Interface.bridge
import
createZope3Bridge
from
interfaces
import
ILexicon
as
z3ILexicon
import
ILexicon
The input text may be either a string or a list of strings.
createZope3Bridge
(
z3ILexicon
,
ILexicon
,
'ILexicon'
)
Parse the text as if they are search terms, and skips words
del
createZope3Bridge
that aren't in the lexicon.
del
z3ILexicon
"""
def
sourceToWordIds
(
text
):
"""Return a sequence of ids of the words parsed from the text.
The input text may be either a string or a list of strings.
Parse the text as if they come from a source document, and
creates new word ids for words that aren't (yet) in the
lexicon.
"""
def
globToWordIds
(
pattern
):
"""Return a sequence of ids of words matching the pattern.
The argument should be a single word using globbing syntax,
e.g. 'foo*' meaning anything starting with 'foo'.
Return the wids for all words in the lexicon that match the
pattern.
"""
def
length
():
"""Return the number of unique term in the lexicon."""
def
get_word
(
wid
):
"""Return the word for the given word id.
Raise KeyError if the word id is not in the lexicon.
"""
def
get_wid
(
word
):
"""Return the wird id for the given word.
Return 0 of the word is not in the lexicon.
"""
def
parseTerms
(
text
):
"""Pass the text through the pipeline.
Return a list of words, normalized by the pipeline
(e.g. stopwords removed, case normalized etc.).
"""
def
isGlob
(
word
):
"""Return true if the word is a globbing pattern.
The word should be one of the words returned by parseTerm().
"""
lib/python/Products/ZCTextIndex/Lexicon.py
View file @
a876b335
...
@@ -8,9 +8,13 @@
...
@@ -8,9 +8,13 @@
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# FOR A PARTICULAR PURPOSE
.
#
#
##############################################################################
##############################################################################
"""Lexicon.
$Id$
"""
import
re
import
re
...
@@ -20,15 +24,19 @@ from BTrees.Length import Length
...
@@ -20,15 +24,19 @@ from BTrees.Length import Length
import
ZODB
import
ZODB
from
Persistence
import
Persistent
from
Persistence
import
Persistent
from
zope.interface
import
implements
from
Products.ZCTextIndex.ILexicon
import
ILexicon
from
Products.ZCTextIndex.StopDict
import
get_stopdict
from
Products.ZCTextIndex.StopDict
import
get_stopdict
from
Products.ZCTextIndex.ParseTree
import
QueryError
from
Products.ZCTextIndex.ParseTree
import
QueryError
from
Products.ZCTextIndex.PipelineFactory
import
element_factory
from
Products.ZCTextIndex.PipelineFactory
import
element_factory
from
ILexicon
import
ILexicon
as
z2ILexicon
from
interfaces
import
ILexicon
class
Lexicon
(
Persistent
):
class
Lexicon
(
Persistent
):
__implements__
=
ILexicon
__implements__
=
z2ILexicon
implements
(
ILexicon
)
def
__init__
(
self
,
*
pipeline
):
def
__init__
(
self
,
*
pipeline
):
self
.
_wids
=
OIBTree
()
# word -> wid
self
.
_wids
=
OIBTree
()
# word -> wid
...
...
lib/python/Products/ZCTextIndex/ZCTextIndex.py
View file @
a876b335
...
@@ -33,17 +33,18 @@ from Products.PluginIndexes.common.util import parseIndexRequest
...
@@ -33,17 +33,18 @@ from Products.PluginIndexes.common.util import parseIndexRequest
from
Products.PluginIndexes.common
import
safe_callable
from
Products.PluginIndexes.common
import
safe_callable
from
Products.PluginIndexes.interfaces
import
IPluggableIndex
from
Products.PluginIndexes.interfaces
import
IPluggableIndex
from
Products.ZCTextIndex.ILexicon
import
ILexicon
from
Products.ZCTextIndex.Lexicon
import
\
from
Products.ZCTextIndex.Lexicon
import
\
Lexicon
,
Splitter
,
CaseNormalizer
,
StopWordRemover
Lexicon
,
Splitter
,
CaseNormalizer
,
StopWordRemover
from
Products.ZCTextIndex.NBest
import
NBest
from
Products.ZCTextIndex.NBest
import
NBest
from
Products.ZCTextIndex.QueryParser
import
QueryParser
from
Products.ZCTextIndex.QueryParser
import
QueryParser
from
PipelineFactory
import
element_factory
from
CosineIndex
import
CosineIndex
from
ILexicon
import
ILexicon
as
z2ILexicon
from
interfaces
import
ILexicon
from
interfaces
import
IZCLexicon
from
interfaces
import
IZCLexicon
from
interfaces
import
IZCTextIndex
from
interfaces
import
IZCTextIndex
from
OkapiIndex
import
OkapiIndex
from
PipelineFactory
import
element_factory
from
Products.ZCTextIndex.CosineIndex
import
CosineIndex
from
Products.ZCTextIndex.OkapiIndex
import
OkapiIndex
index_types
=
{
'Okapi BM25 Rank'
:
OkapiIndex
,
index_types
=
{
'Okapi BM25 Rank'
:
OkapiIndex
,
'Cosine Measure'
:
CosineIndex
}
'Cosine Measure'
:
CosineIndex
}
...
@@ -89,7 +90,8 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
...
@@ -89,7 +90,8 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
if
lexicon
is
None
:
if
lexicon
is
None
:
raise
LookupError
,
'Lexicon "%s" not found'
%
escape
(
lexicon_id
)
raise
LookupError
,
'Lexicon "%s" not found'
%
escape
(
lexicon_id
)
if
not
ILexicon
.
isImplementedBy
(
lexicon
):
if
not
(
ILexicon
.
providedBy
(
lexicon
)
or
z2ILexicon
.
isImplementedBy
(
lexicon
)):
raise
ValueError
(
'Object "%s" does not implement '
raise
ValueError
(
'Object "%s" does not implement '
'ZCTextIndex Lexicon interface'
'ZCTextIndex Lexicon interface'
%
lexicon
.
getId
())
%
lexicon
.
getId
())
...
@@ -134,7 +136,8 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
...
@@ -134,7 +136,8 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
return
self
.
_v_lexicon
return
self
.
_v_lexicon
except
AttributeError
:
except
AttributeError
:
lexicon
=
getattr
(
aq_parent
(
aq_inner
(
self
)),
self
.
lexicon_id
)
lexicon
=
getattr
(
aq_parent
(
aq_inner
(
self
)),
self
.
lexicon_id
)
if
not
ILexicon
.
isImplementedBy
(
lexicon
):
if
not
(
ILexicon
.
providedBy
(
lexicon
)
or
z2ILexicon
.
isImplementedBy
(
lexicon
)):
raise
TypeError
(
'Object "%s" is not a ZCTextIndex Lexicon'
raise
TypeError
(
'Object "%s" is not a ZCTextIndex Lexicon'
%
repr
(
lexicon
))
%
repr
(
lexicon
))
self
.
_v_lexicon
=
lexicon
self
.
_v_lexicon
=
lexicon
...
...
lib/python/Products/ZCTextIndex/interfaces.py
View file @
a876b335
...
@@ -24,6 +24,70 @@ class IZCTextIndex(Interface):
...
@@ -24,6 +24,70 @@ class IZCTextIndex(Interface):
"""
"""
class
ILexicon
(
Interface
):
"""Object responsible for converting text to word identifiers.
"""
def
termToWordIds
(
text
):
"""Return a sequence of ids of the words parsed from the text.
The input text may be either a string or a list of strings.
Parse the text as if they are search terms, and skips words
that aren't in the lexicon.
"""
def
sourceToWordIds
(
text
):
"""Return a sequence of ids of the words parsed from the text.
The input text may be either a string or a list of strings.
Parse the text as if they come from a source document, and
creates new word ids for words that aren't (yet) in the
lexicon.
"""
def
globToWordIds
(
pattern
):
"""Return a sequence of ids of words matching the pattern.
The argument should be a single word using globbing syntax,
e.g. 'foo*' meaning anything starting with 'foo'.
Return the wids for all words in the lexicon that match the
pattern.
"""
def
length
():
"""Return the number of unique term in the lexicon.
"""
def
get_word
(
wid
):
"""Return the word for the given word id.
Raise KeyError if the word id is not in the lexicon.
"""
def
get_wid
(
word
):
"""Return the wird id for the given word.
Return 0 of the word is not in the lexicon.
"""
def
parseTerms
(
text
):
"""Pass the text through the pipeline.
Return a list of words, normalized by the pipeline
(e.g. stopwords removed, case normalized etc.).
"""
def
isGlob
(
word
):
"""Return true if the word is a globbing pattern.
The word should be one of the words returned by parseTerm().
"""
class
IZCLexicon
(
Interface
):
class
IZCLexicon
(
Interface
):
"""Lexicon for ZCTextIndex.
"""Lexicon for ZCTextIndex.
...
...
lib/python/Products/ZCTextIndex/tests/testLexicon.py
View file @
a876b335
...
@@ -8,12 +8,17 @@
...
@@ -8,12 +8,17 @@
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# FOR A PARTICULAR PURPOSE
.
#
#
##############################################################################
##############################################################################
"""Lexicon unit tests.
$Id$
"""
import
unittest
import
os
,
sys
import
os
,
sys
from
unittest
import
TestCase
,
TestSuite
,
main
,
makeSuite
import
ZODB
import
ZODB
import
transaction
import
transaction
...
@@ -64,7 +69,20 @@ class StopWordPipelineElement:
...
@@ -64,7 +69,20 @@ class StopWordPipelineElement:
return
res
return
res
class
Test
(
TestCase
):
class
Test
(
unittest
.
TestCase
):
def
test_z2interfaces
(
self
):
from
Interface.Verify
import
verifyClass
from
Products.ZCTextIndex.ILexicon
import
ILexicon
verifyClass
(
ILexicon
,
Lexicon
)
def
test_z3interfaces
(
self
):
from
Products.ZCTextIndex.interfaces
import
ILexicon
from
zope.interface.verify
import
verifyClass
verifyClass
(
ILexicon
,
Lexicon
)
def
testSourceToWordIds
(
self
):
def
testSourceToWordIds
(
self
):
lexicon
=
Lexicon
(
Splitter
())
lexicon
=
Lexicon
(
Splitter
())
wids
=
lexicon
.
sourceToWordIds
(
'cats and dogs'
)
wids
=
lexicon
.
sourceToWordIds
(
'cats and dogs'
)
...
@@ -145,7 +163,7 @@ class Test(TestCase):
...
@@ -145,7 +163,7 @@ class Test(TestCase):
lexicon
.
sourceToWordIds
(
'how now brown cow'
)
lexicon
.
sourceToWordIds
(
'how now brown cow'
)
self
.
assert_
(
lexicon
.
length
.
__class__
is
Length
)
self
.
assert_
(
lexicon
.
length
.
__class__
is
Length
)
class
TestLexiconConflict
(
TestCase
):
class
TestLexiconConflict
(
unittest
.
TestCase
):
db
=
None
db
=
None
...
@@ -186,11 +204,12 @@ class TestLexiconConflict(TestCase):
...
@@ -186,11 +204,12 @@ class TestLexiconConflict(TestCase):
self
.
assertEqual
(
copy
.
length
(),
11
)
self
.
assertEqual
(
copy
.
length
(),
11
)
self
.
assertEqual
(
copy
.
length
(),
len
(
copy
.
_words
))
self
.
assertEqual
(
copy
.
length
(),
len
(
copy
.
_words
))
def
test_suite
():
def
test_suite
():
suite
=
TestSuite
()
suite
=
unittest
.
TestSuite
()
suite
.
addTest
(
makeSuite
(
Test
))
suite
.
addTest
(
unittest
.
makeSuite
(
Test
))
suite
.
addTest
(
makeSuite
(
TestLexiconConflict
))
suite
.
addTest
(
unittest
.
makeSuite
(
TestLexiconConflict
))
return
suite
return
suite
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
main
(
defaultTest
=
'test_suite'
)
unittest
.
main
(
defaultTest
=
'test_suite'
)
lib/python/Products/ZCTextIndex/tests/testZCTextIndex.py
View file @
a876b335
...
@@ -17,9 +17,6 @@ $Id$
...
@@ -17,9 +17,6 @@ $Id$
"""
"""
import
unittest
import
unittest
import
Testing
import
Zope2
Zope2
.
startup
()
import
re
import
re
...
@@ -577,9 +574,11 @@ class OkapiQueryTests(QueryTestsBase):
...
@@ -577,9 +574,11 @@ class OkapiQueryTests(QueryTestsBase):
class PLexiconTests(unittest.TestCase):
class PLexiconTests(unittest.TestCase):
def test_z3interfaces(self):
def test_z3interfaces(self):
from Products.ZCTextIndex.interfaces import ILexicon
from Products.ZCTextIndex.interfaces import IZCLexicon
from Products.ZCTextIndex.interfaces import IZCLexicon
from zope.interface.verify import verifyClass
from zope.interface.verify import verifyClass
verifyClass(ILexicon, PLexicon)
verifyClass(IZCLexicon, PLexicon)
verifyClass(IZCLexicon, PLexicon)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment