Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
ea795e5a
Commit
ea795e5a
authored
May 16, 2002
by
Guido van Rossum
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix queries of the form 'extension module C'.
parent
9319c8e0
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
27 additions
and
11 deletions
+27
-11
lib/python/Products/ZCTextIndex/CosineIndex.py
lib/python/Products/ZCTextIndex/CosineIndex.py
+8
-0
lib/python/Products/ZCTextIndex/Lexicon.py
lib/python/Products/ZCTextIndex/Lexicon.py
+1
-3
lib/python/Products/ZCTextIndex/OkapiIndex.py
lib/python/Products/ZCTextIndex/OkapiIndex.py
+6
-0
lib/python/Products/ZCTextIndex/SetOps.py
lib/python/Products/ZCTextIndex/SetOps.py
+1
-1
lib/python/Products/ZCTextIndex/ZCTextIndex.py
lib/python/Products/ZCTextIndex/ZCTextIndex.py
+2
-0
lib/python/Products/ZCTextIndex/tests/mhindex.py
lib/python/Products/ZCTextIndex/tests/mhindex.py
+7
-5
lib/python/Products/ZCTextIndex/tests/testLexicon.py
lib/python/Products/ZCTextIndex/tests/testLexicon.py
+2
-2
No files found.
lib/python/Products/ZCTextIndex/CosineIndex.py
View file @
ea795e5a
...
@@ -115,6 +115,10 @@ class CosineIndex(Persistent):
...
@@ -115,6 +115,10 @@ class CosineIndex(Persistent):
def
search
(
self
,
term
):
def
search
(
self
,
term
):
wids
=
self
.
_lexicon
.
termToWordIds
(
term
)
wids
=
self
.
_lexicon
.
termToWordIds
(
term
)
if
not
wids
:
return
None
# All docs match
if
0
in
wids
:
wids
=
filter
(
None
,
wids
)
return
mass_weightedUnion
(
self
.
_search_wids
(
wids
))
return
mass_weightedUnion
(
self
.
_search_wids
(
wids
))
def
search_glob
(
self
,
pattern
):
def
search_glob
(
self
,
pattern
):
...
@@ -123,6 +127,8 @@ class CosineIndex(Persistent):
...
@@ -123,6 +127,8 @@ class CosineIndex(Persistent):
def
search_phrase
(
self
,
phrase
):
def
search_phrase
(
self
,
phrase
):
wids
=
self
.
_lexicon
.
termToWordIds
(
phrase
)
wids
=
self
.
_lexicon
.
termToWordIds
(
phrase
)
if
0
in
wids
:
return
IIBTree
()
hits
=
mass_weightedIntersection
(
self
.
_search_wids
(
wids
))
hits
=
mass_weightedIntersection
(
self
.
_search_wids
(
wids
))
if
not
hits
:
if
not
hits
:
return
hits
return
hits
...
@@ -157,6 +163,8 @@ class CosineIndex(Persistent):
...
@@ -157,6 +163,8 @@ class CosineIndex(Persistent):
N
=
float
(
len
(
self
.
_docweight
))
N
=
float
(
len
(
self
.
_docweight
))
sum
=
0.0
sum
=
0.0
for
wid
in
wids
:
for
wid
in
wids
:
if
wid
==
0
:
continue
wt
=
math
.
log
(
1.0
+
N
/
len
(
self
.
_wordinfo
[
wid
]))
wt
=
math
.
log
(
1.0
+
N
/
len
(
self
.
_wordinfo
[
wid
]))
sum
+=
wt
**
2.0
sum
+=
wt
**
2.0
return
scaled_int
(
math
.
sqrt
(
sum
))
return
scaled_int
(
math
.
sqrt
(
sum
))
...
...
lib/python/Products/ZCTextIndex/Lexicon.py
View file @
ea795e5a
...
@@ -62,9 +62,7 @@ class Lexicon:
...
@@ -62,9 +62,7 @@ class Lexicon:
last
=
element
.
process
(
last
)
last
=
element
.
process
(
last
)
wids
=
[]
wids
=
[]
for
word
in
last
:
for
word
in
last
:
wid
=
self
.
_wids
.
get
(
word
)
wids
.
append
(
self
.
_wids
.
get
(
word
,
0
))
if
wid
is
not
None
:
wids
.
append
(
wid
)
return
wids
return
wids
def
get_word
(
self
,
wid
):
def
get_word
(
self
,
wid
):
...
...
lib/python/Products/ZCTextIndex/OkapiIndex.py
View file @
ea795e5a
...
@@ -109,6 +109,10 @@ class OkapiIndex(Persistent):
...
@@ -109,6 +109,10 @@ class OkapiIndex(Persistent):
def
search
(
self
,
term
):
def
search
(
self
,
term
):
wids
=
self
.
_lexicon
.
termToWordIds
(
term
)
wids
=
self
.
_lexicon
.
termToWordIds
(
term
)
if
not
wids
:
return
None
# All docs match
if
0
in
wids
:
wids
=
filter
(
None
,
wids
)
return
mass_weightedUnion
(
self
.
_search_wids
(
wids
))
return
mass_weightedUnion
(
self
.
_search_wids
(
wids
))
def
search_glob
(
self
,
pattern
):
def
search_glob
(
self
,
pattern
):
...
@@ -117,6 +121,8 @@ class OkapiIndex(Persistent):
...
@@ -117,6 +121,8 @@ class OkapiIndex(Persistent):
def
search_phrase
(
self
,
phrase
):
def
search_phrase
(
self
,
phrase
):
wids
=
self
.
_lexicon
.
termToWordIds
(
phrase
)
wids
=
self
.
_lexicon
.
termToWordIds
(
phrase
)
if
0
in
wids
:
return
IIBTree
()
hits
=
mass_weightedIntersection
(
self
.
_search_wids
(
wids
))
hits
=
mass_weightedIntersection
(
self
.
_search_wids
(
wids
))
if
not
hits
:
if
not
hits
:
return
hits
return
hits
...
...
lib/python/Products/ZCTextIndex/SetOps.py
View file @
ea795e5a
...
@@ -20,10 +20,10 @@ from Products.ZCTextIndex.NBest import NBest
...
@@ -20,10 +20,10 @@ from Products.ZCTextIndex.NBest import NBest
def
mass_weightedIntersection
(
L
):
def
mass_weightedIntersection
(
L
):
"A list of (mapping, weight) pairs -> their weightedIntersection IIBTree."
"A list of (mapping, weight) pairs -> their weightedIntersection IIBTree."
L
=
[(
map
,
weight
)
for
(
map
,
weight
)
in
L
if
map
is
not
None
]
if
not
L
:
if
not
L
:
return
IIBTree
()
return
IIBTree
()
# Intersect with smallest first.
# Intersect with smallest first.
L
=
L
[:]
# don't mutate the caller's L
L
.
sort
(
lambda
x
,
y
:
cmp
(
len
(
x
[
0
]),
len
(
y
[
0
])))
L
.
sort
(
lambda
x
,
y
:
cmp
(
len
(
x
[
0
]),
len
(
y
[
0
])))
x
,
w
=
L
[
0
]
x
,
w
=
L
[
0
]
dummy
,
result
=
weightedUnion
(
IIBTree
(),
x
,
1
,
w
)
dummy
,
result
=
weightedUnion
(
IIBTree
(),
x
,
1
,
w
)
...
...
lib/python/Products/ZCTextIndex/ZCTextIndex.py
View file @
ea795e5a
...
@@ -72,6 +72,8 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
...
@@ -72,6 +72,8 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
"""
"""
tree
=
QueryParser
().
parseQuery
(
query
)
tree
=
QueryParser
().
parseQuery
(
query
)
results
=
tree
.
executeQuery
(
self
.
index
)
results
=
tree
.
executeQuery
(
self
.
index
)
if
results
is
None
:
return
[],
0
chooser
=
NBest
(
nbest
)
chooser
=
NBest
(
nbest
)
chooser
.
addmany
(
results
.
items
())
chooser
.
addmany
(
results
.
items
())
return
chooser
.
getbest
(),
len
(
results
)
return
chooser
.
getbest
(),
len
(
results
)
...
...
lib/python/Products/ZCTextIndex/tests/mhindex.py
View file @
ea795e5a
...
@@ -143,7 +143,7 @@ class Indexer:
...
@@ -143,7 +143,7 @@ class Indexer:
if
not
text
:
if
not
text
:
continue
continue
try
:
try
:
n
,
results
=
self
.
timequery
(
text
,
top
+
nbest
)
results
,
n
=
self
.
timequery
(
text
,
top
+
nbest
)
except
:
except
:
reportexc
()
reportexc
()
text
=
""
text
=
""
...
@@ -163,7 +163,7 @@ class Indexer:
...
@@ -163,7 +163,7 @@ class Indexer:
top
+=
nbest
top
+=
nbest
def
query
(
self
,
text
,
nbest
=
NBEST
,
maxlines
=
MAXLINES
):
def
query
(
self
,
text
,
nbest
=
NBEST
,
maxlines
=
MAXLINES
):
n
,
results
=
self
.
timequery
(
text
,
nbest
)
results
,
n
=
self
.
timequery
(
text
,
nbest
)
if
not
n
:
if
not
n
:
print
"No hits for %r."
%
text
print
"No hits for %r."
%
text
return
return
...
@@ -173,11 +173,11 @@ class Indexer:
...
@@ -173,11 +173,11 @@ class Indexer:
def
timequery
(
self
,
text
,
nbest
):
def
timequery
(
self
,
text
,
nbest
):
t0
=
time
.
time
()
t0
=
time
.
time
()
c0
=
time
.
clock
()
c0
=
time
.
clock
()
n
,
results
=
self
.
index
.
query
(
text
,
nbest
)
results
,
n
=
self
.
index
.
query
(
text
,
nbest
)
t1
=
time
.
time
()
t1
=
time
.
time
()
c1
=
time
.
clock
()
c1
=
time
.
clock
()
print
"[Query time: %.3f real, %.3f user]"
%
(
t1
-
t0
,
c1
-
c0
)
print
"[Query time: %.3f real, %.3f user]"
%
(
t1
-
t0
,
c1
-
c0
)
return
n
,
results
return
results
,
n
def
formatresults
(
self
,
text
,
results
,
maxlines
=
MAXLINES
,
def
formatresults
(
self
,
text
,
results
,
maxlines
=
MAXLINES
,
lo
=
0
,
hi
=
sys
.
maxint
):
lo
=
0
,
hi
=
sys
.
maxint
):
...
@@ -397,9 +397,11 @@ class TextIndex(Persistent):
...
@@ -397,9 +397,11 @@ class TextIndex(Persistent):
parser
=
QueryParser
()
parser
=
QueryParser
()
tree
=
parser
.
parseQuery
(
query
)
tree
=
parser
.
parseQuery
(
query
)
results
=
tree
.
executeQuery
(
self
.
index
)
results
=
tree
.
executeQuery
(
self
.
index
)
if
results
is
None
:
return
[],
0
chooser
=
NBest
(
nbest
)
chooser
=
NBest
(
nbest
)
chooser
.
addmany
(
results
.
items
())
chooser
.
addmany
(
results
.
items
())
return
len
(
results
),
chooser
.
getbest
(
)
return
chooser
.
getbest
(),
len
(
results
)
def
query_weight
(
self
,
query
):
def
query_weight
(
self
,
query
):
parser
=
QueryParser
()
parser
=
QueryParser
()
...
...
lib/python/Products/ZCTextIndex/tests/testLexicon.py
View file @
ea795e5a
...
@@ -76,7 +76,7 @@ class Test(TestCase):
...
@@ -76,7 +76,7 @@ class Test(TestCase):
lexicon
=
Lexicon
(
Splitter
())
lexicon
=
Lexicon
(
Splitter
())
wids
=
lexicon
.
sourceToWordIds
(
'cats and dogs'
)
wids
=
lexicon
.
sourceToWordIds
(
'cats and dogs'
)
wids
=
lexicon
.
termToWordIds
(
'boxes'
)
wids
=
lexicon
.
termToWordIds
(
'boxes'
)
self
.
assertEqual
(
wids
,
[])
self
.
assertEqual
(
wids
,
[
0
])
def
testOnePipelineElement
(
self
):
def
testOnePipelineElement
(
self
):
lexicon
=
Lexicon
(
Splitter
(),
StupidPipelineElement
(
'dogs'
,
'fish'
))
lexicon
=
Lexicon
(
Splitter
(),
StupidPipelineElement
(
'dogs'
,
'fish'
))
...
@@ -94,7 +94,7 @@ class Test(TestCase):
...
@@ -94,7 +94,7 @@ class Test(TestCase):
lexicon
=
Lexicon
(
Splitter
())
lexicon
=
Lexicon
(
Splitter
())
wids
=
lexicon
.
sourceToWordIds
(
'CATS and dogs'
)
wids
=
lexicon
.
sourceToWordIds
(
'CATS and dogs'
)
wids
=
lexicon
.
termToWordIds
(
'cats and dogs'
)
wids
=
lexicon
.
termToWordIds
(
'cats and dogs'
)
self
.
assertEqual
(
wids
,
[
2
,
3
])
self
.
assertEqual
(
wids
,
[
0
,
2
,
3
])
def
testTwoElementPipeline
(
self
):
def
testTwoElementPipeline
(
self
):
lexicon
=
Lexicon
(
Splitter
(),
lexicon
=
Lexicon
(
Splitter
(),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment