Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
f4c2c29b
Commit
f4c2c29b
authored
May 15, 2002
by
Tim Peters
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Use the new SetOps for mass union/intersection.
parent
08fe38f4
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
56 deletions
+14
-56
lib/python/Products/ZCTextIndex/Index.py
lib/python/Products/ZCTextIndex/Index.py
+7
-22
lib/python/Products/ZCTextIndex/OkapiIndex.py
lib/python/Products/ZCTextIndex/OkapiIndex.py
+7
-34
No files found.
lib/python/Products/ZCTextIndex/Index.py
View file @
f4c2c29b
...
...
@@ -17,11 +17,12 @@
import
math
from
BTrees.IOBTree
import
IOBTree
from
BTrees.IIBTree
import
IIBTree
,
IIBucket
,
IISet
from
BTrees.IIBTree
import
weightedIntersection
,
weightedUnion
from
BTrees.IIBTree
import
IIBTree
,
IIBucket
from
Products.ZCTextIndex.IIndex
import
IIndex
from
Products.ZCTextIndex
import
WidCode
from
Products.ZCTextIndex.SetOps
import
mass_weightedIntersection
,
\
mass_weightedUnion
import
ZODB
from
Persistence
import
Persistent
...
...
@@ -62,7 +63,7 @@ class Index(Persistent):
def
length
(
self
):
"""Return the number of documents in the index."""
return
len
(
self
.
_docwords
)
def
get_words
(
self
,
docid
):
"""Returns the wordids for a given docid"""
return
WidCode
.
decode
(
self
.
_docwords
[
docid
])
...
...
@@ -114,15 +115,15 @@ class Index(Persistent):
def
search
(
self
,
term
):
wids
=
self
.
_lexicon
.
termToWordIds
(
term
)
return
self
.
_u
nion
(
self
.
_search_wids
(
wids
))
return
mass_weightedU
nion
(
self
.
_search_wids
(
wids
))
def
search_glob
(
self
,
pattern
):
wids
=
self
.
_lexicon
.
globToWordIds
(
pattern
)
return
self
.
_u
nion
(
self
.
_search_wids
(
wids
))
return
mass_weightedU
nion
(
self
.
_search_wids
(
wids
))
def
search_phrase
(
self
,
phrase
):
wids
=
self
.
_lexicon
.
termToWordIds
(
phrase
)
hits
=
self
.
_i
ntersection
(
self
.
_search_wids
(
wids
))
hits
=
mass_weightedI
ntersection
(
self
.
_search_wids
(
wids
))
if
not
hits
:
return
hits
code
=
WidCode
.
encode
(
wids
)
...
...
@@ -149,22 +150,6 @@ class Index(Persistent):
L
.
sort
(
lambda
x
,
y
:
cmp
(
len
(
x
[
0
]),
len
(
y
[
0
])))
return
L
def
_intersection
(
self
,
L
):
if
not
L
:
return
IIBTree
()
d2w
,
weight
=
L
[
0
]
dummy
,
result
=
weightedUnion
(
IIBTree
(),
d2w
,
1
,
weight
)
for
d2w
,
weight
in
L
[
1
:]:
dummy
,
result
=
weightedIntersection
(
result
,
d2w
,
1
,
weight
)
return
result
def
_union
(
self
,
L
):
# XXX This can be optimized, see OkapiIndex
result
=
IIBTree
()
for
d2w
,
weight
in
L
:
dummy
,
result
=
weightedUnion
(
result
,
d2w
,
1
,
weight
)
return
result
def
query_weight
(
self
,
terms
):
wids
=
[]
for
term
in
terms
:
...
...
lib/python/Products/ZCTextIndex/OkapiIndex.py
View file @
f4c2c29b
...
...
@@ -20,12 +20,13 @@
import
math
from
BTrees.IOBTree
import
IOBTree
from
BTrees.IIBTree
import
IIBTree
,
IIBucket
,
IISet
from
BTrees.IIBTree
import
weightedIntersection
,
weightedUnion
from
BTrees.IIBTree
import
IIBTree
,
IIBucket
from
Products.ZCTextIndex.IIndex
import
IIndex
from
Products.ZCTextIndex
import
WidCode
from
Products.ZCTextIndex.NBest
import
NBest
from
Products.ZCTextIndex
import
WidCode
from
Products.ZCTextIndex.SetOps
import
mass_weightedIntersection
,
\
mass_weightedUnion
# Instead of storing floats, we generally store scaled ints. Binary pickles
# can store those more efficiently. The default SCALE_FACTOR of 1024
...
...
@@ -98,15 +99,15 @@ class Index:
def
search
(
self
,
term
):
wids
=
self
.
_lexicon
.
termToWordIds
(
term
)
return
self
.
_u
nion
(
self
.
_search_wids
(
wids
))
return
mass_weightedU
nion
(
self
.
_search_wids
(
wids
))
def
search_glob
(
self
,
pattern
):
wids
=
self
.
_lexicon
.
globToWordIds
(
pattern
)
return
self
.
_u
nion
(
self
.
_search_wids
(
wids
))
return
mass_weightedU
nion
(
self
.
_search_wids
(
wids
))
def
search_phrase
(
self
,
phrase
):
wids
=
self
.
_lexicon
.
termToWordIds
(
phrase
)
hits
=
self
.
_i
ntersection
(
self
.
_search_wids
(
wids
))
hits
=
mass_weightedI
ntersection
(
self
.
_search_wids
(
wids
))
if
not
hits
:
return
hits
code
=
WidCode
.
encode
(
wids
)
...
...
@@ -156,34 +157,6 @@ class Index:
# of tf would still done at Python speed, and it's a lot more
# work than just multiplying by idf.
def
_intersection
(
self
,
L
):
if
not
L
:
return
IIBTree
()
# Intersect with smallest first.
L
=
L
[:]
# don't mutate the caller's L
L
.
sort
(
lambda
x
,
y
:
cmp
(
len
(
x
[
0
]),
len
(
y
[
0
])))
d2w
,
weight
=
L
[
0
]
dummy
,
result
=
weightedUnion
(
IIBTree
(),
d2w
,
1
,
weight
)
for
d2w
,
weight
in
L
[
1
:]:
dummy
,
result
=
weightedIntersection
(
result
,
d2w
,
1
,
weight
)
return
result
def
_union
(
self
,
L
):
if
not
L
:
return
IIBTree
()
# Balance unions as closely as possible, smallest to largest.
merge
=
NBest
(
len
(
L
))
for
x
,
weight
in
L
:
merge
.
add
((
x
,
weight
),
len
(
x
))
while
len
(
merge
)
>
1
:
# Merge the two smallest so far, and add back to the queue.
(
x
,
wx
),
dummy
=
merge
.
pop_smallest
()
(
y
,
wy
),
dummy
=
merge
.
pop_smallest
()
dummy
,
z
=
weightedUnion
(
x
,
y
,
wx
,
wy
)
merge
.
add
((
z
,
1
),
len
(
z
))
(
result
,
weight
),
score
=
merge
.
pop_smallest
()
return
result
def
query_weight
(
self
,
terms
):
# XXX I have no idea what to put here
return
10
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment