Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
80cc084f
Commit
80cc084f
authored
May 23, 2002
by
Guido van Rossum
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Dump word frequencies as well.
parent
c892b4bd
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
35 additions
and
5 deletions
+35
-5
lib/python/Products/ZCTextIndex/tests/mhindex.py
lib/python/Products/ZCTextIndex/tests/mhindex.py
+35
-5
No files found.
lib/python/Products/ZCTextIndex/tests/mhindex.py
View file @
80cc084f
...
...
@@ -64,7 +64,7 @@ MAXLINES = 3
def
main
():
try
:
opts
,
args
=
getopt
.
getopt
(
sys
.
argv
[
1
:],
"bd:hm:n:Op:t:uwW"
)
opts
,
args
=
getopt
.
getopt
(
sys
.
argv
[
1
:],
"bd:
f
hm:n:Op:t:uwW"
)
except
getopt
.
error
,
msg
:
print
msg
print
"use -h for help"
...
...
@@ -77,12 +77,14 @@ def main():
datafs
=
os
.
path
.
expanduser
(
DATAFS
)
pack
=
0
trans
=
20000
dumpwords
=
dumpwids
=
0
dumpwords
=
dumpwids
=
dumpfreqs
=
0
for
o
,
a
in
opts
:
if
o
==
"-b"
:
bulk
=
1
if
o
==
"-d"
:
datafs
=
a
if
o
==
"-f"
:
dumpfreqs
=
1
if
o
==
"-h"
:
print
__doc__
return
...
...
@@ -103,11 +105,13 @@ def main():
if
o
==
"-W"
:
dumpwids
=
1
ix
=
Indexer
(
datafs
,
writable
=
update
or
bulk
,
trans
=
trans
,
pack
=
pack
)
if
dumpfreqs
:
ix
.
dumpfreqs
()
if
dumpwords
:
ix
.
dumpwords
()
if
dumpwids
:
ix
.
dumpwids
()
if
dumpwords
or
dumpwids
:
if
dumpwords
or
dumpwids
or
dumpfreqs
:
return
if
bulk
:
if
optimize
:
...
...
@@ -172,15 +176,41 @@ class Indexer:
print
len
(
self
.
path2docid
),
"Pathnames"
print
self
.
index
.
lexicon
.
length
(),
"Words"
def
dumpfreqs
(
self
):
lexicon
=
self
.
index
.
lexicon
index
=
self
.
index
.
index
assert
isinstance
(
index
,
OkapiIndex
)
L
=
[]
for
wid
in
lexicon
.
wids
():
freq
=
0
for
f
in
index
.
_wordinfo
.
get
(
wid
,
{}).
values
():
freq
+=
f
L
.
append
((
freq
,
wid
,
lexicon
.
get_word
(
wid
)))
L
.
sort
()
L
.
reverse
()
for
freq
,
wid
,
word
in
L
:
print
"%10d %10d %s"
%
(
wid
,
freq
,
word
)
def
dumpwids
(
self
):
lexicon
=
self
.
index
.
lexicon
index
=
self
.
index
.
index
assert
isinstance
(
index
,
OkapiIndex
)
for
wid
in
lexicon
.
wids
():
print
"%10d %s"
%
(
wid
,
lexicon
.
get_word
(
wid
))
freq
=
0
for
f
in
index
.
_wordinfo
.
get
(
wid
,
{}).
values
():
freq
+=
f
print
"%10d %10d %s"
%
(
wid
,
freq
,
lexicon
.
get_word
(
wid
))
def
dumpwords
(
self
):
lexicon
=
self
.
index
.
lexicon
index
=
self
.
index
.
index
assert
isinstance
(
index
,
OkapiIndex
)
for
word
in
lexicon
.
words
():
print
"%10d %s"
%
(
lexicon
.
get_wid
(
word
),
word
)
wid
=
lexicon
.
get_wid
(
word
)
freq
=
0
for
f
in
index
.
_wordinfo
.
get
(
wid
,
{}).
values
():
freq
+=
f
print
"%10d %10d %s"
%
(
wid
,
freq
,
word
)
def
close
(
self
):
self
.
root
=
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment