Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
b0a8f678
Commit
b0a8f678
authored
Jan 29, 1997
by
chris
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added list_class argument to Index __init__
parent
2813712a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
157 additions
and
99 deletions
+157
-99
lib/python/SearchIndex/InvertedIndex.py
lib/python/SearchIndex/InvertedIndex.py
+157
-99
No files found.
lib/python/SearchIndex/InvertedIndex.py
View file @
b0a8f678
...
@@ -32,7 +32,7 @@ Example usage:
...
@@ -32,7 +32,7 @@ Example usage:
InvertedIndex provides three types of indexes: one non-persistent
InvertedIndex provides three types of indexes: one non-persistent
index, Index, and two persistent indexes, Persistent and Transactional.
index, Index, and two persistent indexes, Persistent and Transactional.
$Id: InvertedIndex.py,v 1.
9 1996/12/23 21:54:1
0 chris Exp $'''
$Id: InvertedIndex.py,v 1.
10 1997/01/29 16:48:4
0 chris Exp $'''
# Copyright
# Copyright
#
#
# Copyright 1996 Digital Creations, L.C., 910 Princess Anne
# Copyright 1996 Digital Creations, L.C., 910 Princess Anne
...
@@ -66,7 +66,7 @@ $Id: InvertedIndex.py,v 1.9 1996/12/23 21:54:10 chris Exp $'''
...
@@ -66,7 +66,7 @@ $Id: InvertedIndex.py,v 1.9 1996/12/23 21:54:10 chris Exp $'''
#
#
# Limitation Of Liability
# Limitation Of Liability
#
#
# In no event will DCLC be liable for direct, indirect, special,
3
# In no event will DCLC be liable for direct, indirect, special,
# incidental, economic, cover, or consequential damages arising
# incidental, economic, cover, or consequential damages arising
# out of the use of or inability to use this software even if
# out of the use of or inability to use this software even if
# advised of the possibility of such damages. Some states do not
# advised of the possibility of such damages. Some states do not
...
@@ -84,6 +84,9 @@ $Id: InvertedIndex.py,v 1.9 1996/12/23 21:54:10 chris Exp $'''
...
@@ -84,6 +84,9 @@ $Id: InvertedIndex.py,v 1.9 1996/12/23 21:54:10 chris Exp $'''
# (540) 371-6909
# (540) 371-6909
#
#
# $Log: InvertedIndex.py,v $
# $Log: InvertedIndex.py,v $
# Revision 1.10 1997/01/29 16:48:40 chris
# added list_class argument to Index __init__
#
# Revision 1.9 1996/12/23 21:54:10 chris
# Revision 1.9 1996/12/23 21:54:10 chris
# Checked out by Chris for testing/editing.
# Checked out by Chris for testing/editing.
#
#
...
@@ -116,7 +119,7 @@ $Id: InvertedIndex.py,v 1.9 1996/12/23 21:54:10 chris Exp $'''
...
@@ -116,7 +119,7 @@ $Id: InvertedIndex.py,v 1.9 1996/12/23 21:54:10 chris Exp $'''
#
#
#
#
#
#
__version__
=
'$Revision: 1.
9
$'
[
11
:
-
2
]
__version__
=
'$Revision: 1.
10
$'
[
11
:
-
2
]
import
regex
,
regsub
,
string
,
marshal
import
regex
,
regsub
,
string
,
marshal
...
@@ -126,44 +129,77 @@ from types import *
...
@@ -126,44 +129,77 @@ from types import *
class
ResultList
:
class
ResultList
:
'''
\
'''
\
This object holds the list of frequency/key pairs for a word
This object holds the information for a word in an inverted index. It
in an inverted index.
provides mapping behavior, mapping document keys to corresponding
document information, including the frequency value.
Union of two ResultList objects may be performed with the | operator.
Union of two ResultList objects may be performed with the | operator.
Intersection of two ResultList objects may be performed with the & operator.
Intersection of two ResultList objects may be performed with the & operator.
A "not" operation may be performed on a ResultList using its Not() method.
Other methods:
ResultList frequency/key pairs may be sorted highest frequency to lowest
Not()
using the sort() method.
near()
keys()
items()
sorted_items()
'''
'''
def
__init__
(
self
,
freq_key_pairs
=
None
):
def
__init__
(
self
,
d
=
{}):
if
(
freq_key_pairs
is
None
):
self
.
_dict
=
d
self
.
_list
=
[]
else
:
self
.
_list
=
freq_key_pairs
def
addentry
(
self
,
freq
,
key
):
def
addentry
(
self
,
document_key
,
*
info
):
self
.
_list
.
append
((
freq
,
key
))
'''
\
addentry(document_key, *info)
add a document and related information to this ResultList'''
self
.
_dict
[
document_key
]
=
info
def
__str__
(
self
):
def
__str__
(
self
):
return
`self._
lis
t`
return
`self._
dic
t`
def
__len__
(
self
):
def
__len__
(
self
):
return
len
(
self
.
_list
)
return
len
(
self
.
_dict
)
def
__getitem__
(
self
,
key
):
return
self
.
_dict
[
key
]
def
__delitem__
(
self
,
key
):
del
self
.
_dict
[
key
]
def
keys
(
self
):
'''
\
keys()
get the documents in this ResultList'''
return
self
.
_dict
.
keys
()
def
has_key
(
self
,
key
):
return
self
.
_dict
.
has_key
(
key
)
def
items
(
self
):
'''items()
get a list of document key/document information pairs'''
return
self
.
_dict
.
items
()
def
__getitem__
(
self
,
i
):
def
sorted_items
(
self
):
return
self
.
_list
[
i
]
'''sorted_items()
get a
Sort the frequency/key pairs in the ResultList by highest to lowest
frequency'''
def
__getslice__
(
self
,
i
,
j
):
items
=
self
.
_dict
.
items
()
return
self
.
_list
[
i
:
j
]
items
.
sort
(
lambda
x
,
y
:
-
cmp
(
x
[
1
][
0
],
y
[
1
][
0
]))
return
items
def
__and__
(
self
,
x
):
def
__and__
(
self
,
x
):
...
@@ -172,15 +208,12 @@ class ResultList:
...
@@ -172,15 +208,12 @@ class ResultList:
by calculating the geometric mean of each pair of corresponding
by calculating the geometric mean of each pair of corresponding
frequencies.'''
frequencies.'''
result
=
[]
result
=
{}
d
=
{}
for
entry
in
self
.
_list
:
d
[
entry
[
1
]]
=
entry
[
0
]
for
entry
in
x
.
_list
:
for
key
in
x
.
keys
()
:
try
:
try
:
result
.
append
((
pow
(
d
[
entry
[
1
]]
*
entry
[
0
],
0.5
),
entry
[
1
])
)
result
[
key
]
=
(
pow
(
self
[
key
][
0
]
*
x
[
key
][
0
],
0.5
),
None
)
except
:
except
KeyError
:
pass
pass
return
ResultList
(
result
)
return
ResultList
(
result
)
...
@@ -192,19 +225,16 @@ class ResultList:
...
@@ -192,19 +225,16 @@ class ResultList:
combined by calculating the sum of each pair of corresponding
combined by calculating the sum of each pair of corresponding
frequencies.'''
frequencies.'''
result
=
[]
result
=
{}
d
=
{}
for
entry
in
self
.
_list
:
d
[
entry
[
1
]]
=
entry
[
0
]
for
entry
in
x
.
_list
:
for
key
in
self
.
keys
():
try
:
result
[
key
]
=
(
self
[
key
][
0
],
None
)
d
[
entry
[
1
]]
=
d
[
entry
[
1
]]
+
entry
[
0
]
except
:
d
[
entry
[
1
]]
=
entry
[
0
]
for
key
in
d
.
keys
():
for
key
in
x
.
keys
():
result
.
append
((
d
[
key
],
key
))
try
:
result
[
key
]
=
(
result
[
key
][
0
]
+
x
[
key
][
0
],
None
)
except
KeyError
:
result
[
key
]
=
(
x
[
key
][
0
],
None
)
return
ResultList
(
result
)
return
ResultList
(
result
)
...
@@ -220,64 +250,70 @@ class ResultList:
...
@@ -220,64 +250,70 @@ class ResultList:
ResultList instance.'''
ResultList instance.'''
index
=
index
.
_index_object
index
=
index
.
_index_object
res
=
None
exclude
=
{}
for
item
in
self
.
_list
:
exclude
[
item
[
1
]]
=
1
for
key
in
index
.
keys
():
for
key
in
index
.
keys
():
for
item
in
index
[
key
].
_list
:
try
:
if
(
not
exclude
.
has_key
(
item
[
1
])):
keys
=
index
[
key
].
keys
()
try
:
except
KeyError
:
res
=
res
|
ResultList
([
item
])
continue
except
:
res
=
ResultList
([
item
])
index_val
=
index
[
key
]
for
key
in
keys
:
if
(
not
self
.
has_key
(
key
)):
if
(
res
):
res
=
res
|
{
key
:
index_val
[
key
]
}
else
:
res
=
ResultList
({
key
:
index_val
[
key
]
})
try
:
if
(
res
)
:
return
res
return
res
except
:
return
ResultList
()
return
ResultList
()
def
__sub__
(
self
,
x
):
pass
def
near
(
self
,
x
,
distance
=
1
):
result
=
{}
def
__add__
(
self
,
x
):
for
key
in
self
.
keys
():
return
ResultList
(
self
.
_list
+
x
[:])
try
:
value
=
x
[
key
]
except
KeyError
:
continue
positions1
=
self
[
key
][
1
]
positions2
=
value
[
1
]
def
sort
(
self
):
for
position1
in
positions1
:
'''
\
for
position2
in
positions2
:
sort()
Sort the frequency/key pairs in the ResultList by highest to lowest
if
(
position1
is
None
or
position2
is
None
):
frequency'''
break
self
.
_list
.
sort
()
prox
=
position2
-
position1
self
.
_list
.
reverse
()
if
((
prox
>
0
)
and
(
prox
<=
distance
)):
rel
=
pow
(
self
[
key
][
0
]
*
value
[
0
],
0.5
)
try
:
pos
=
result
[
key
][
1
]
+
[
position2
]
except
KeyError
:
pos
=
[
position2
]
def
__getstate__
(
self
):
result
[
key
]
=
(
rel
,
pos
)
l
=
self
.
_list
else
:
new_l
=
[]
continue
for
key
,
freq
in
l
:
new_l
=
new_l
+
[
key
,
freq
]
break
return
marshal
.
dumps
(
new_l
)
return
ResultList
(
result
)
def
__setstate__
(
self
,
marshaled_state
):
l
=
marshal
.
loads
(
marshaled_state
)
if
(
len
(
l
)
and
l
[
0
]
is
not
TupleType
):
def
__getstate__
(
self
):
new_l
=
[]
return
self
.
_dict
for
i
in
range
(
0
,
len
(
l
),
2
):
new_l
.
append
(
tuple
(
l
[
i
:
(
i
+
2
)]))
l
=
new_l
self
.
_list
=
l
def
__setstate__
(
self
,
state
):
self
.
_dict
=
state
RegexType
=
type
(
regex
.
compile
(
''
))
RegexType
=
type
(
regex
.
compile
(
''
))
...
@@ -326,12 +362,15 @@ class Index:
...
@@ -326,12 +362,15 @@ class Index:
# perform a test search
# perform a test search
print i['blah']
print i['blah']
'''
#'
'''
list_class
=
ResultList
list_class
=
ResultList
def
__init__
(
self
,
index_dictionary
=
None
):
def
__init__
(
self
,
index_dictionary
=
None
,
list_class
=
None
):
'Create an inverted index'
'Create an inverted index'
if
(
list_class
is
not
None
):
self
.
list_class
=
list_class
self
.
set_index
(
index_dictionary
)
self
.
set_index
(
index_dictionary
)
...
@@ -380,10 +419,12 @@ class Index:
...
@@ -380,10 +419,12 @@ class Index:
nwords
=
math
.
log
(
len
(
src
))
nwords
=
math
.
log
(
len
(
src
))
i
=
{}
d
=
{}
for
s
in
src
:
for
i
in
range
(
len
(
src
)):
s
=
src
[
i
]
s
=
string
.
lower
(
s
)
s
=
string
.
lower
(
s
)
stopword_flag
=
0
stopword_flag
=
0
while
(
not
stopword_flag
):
while
(
not
stopword_flag
):
try
:
try
:
index_val
=
index
[
s
]
index_val
=
index
[
s
]
...
@@ -400,22 +441,21 @@ class Index:
...
@@ -400,22 +441,21 @@ class Index:
continue
continue
try
:
try
:
i
[
s
]
=
i
[
s
]
+
1
d
[
s
].
append
(
i
)
except
:
except
KeyError
:
i
[
s
]
=
1
d
[
s
]
=
[
i
]
for
s
in
i
.
keys
():
for
s
in
d
.
keys
():
freq
=
int
(
10000
*
(
i
[
s
]
/
nwords
))
freq
=
int
(
10000
*
(
len
(
d
[
s
])
/
nwords
))
try
:
try
:
index
[
s
].
addentry
(
freq
,
srckey
)
index
[
s
].
addentry
(
srckey
,
freq
,
d
[
s
]
)
except
:
except
:
index
[
s
]
=
List
(
[(
freq
,
srckey
)]
)
index
[
s
]
=
List
(
{
srckey
:
(
freq
,
d
[
s
])}
)
def
__getitem__
(
self
,
key
):
def
__getitem__
(
self
,
key
):
'''
'''
Get the ResultList objects for the inverted key, key, sorted by
Get the ResultList objects for the inverted key, key.
frequency.
The key may be a regular expression, in which case a regular
The key may be a regular expression, in which case a regular
expression match is done.
expression match is done.
...
@@ -451,11 +491,6 @@ class Index:
...
@@ -451,11 +491,6 @@ class Index:
key
=
string
.
lower
(
key
)
key
=
string
.
lower
(
key
)
try
:
key
=
index
[
key
]
except
KeyError
:
return
List
()
while
(
type
(
key
)
==
StringType
):
while
(
type
(
key
)
==
StringType
):
try
:
try
:
key
=
index
[
key
]
key
=
index
[
key
]
...
@@ -475,6 +510,15 @@ class Index:
...
@@ -475,6 +510,15 @@ class Index:
def
__len__
(
self
):
def
__len__
(
self
):
return
len
(
self
.
_index_object
)
return
len
(
self
.
_index_object
)
def
remove_document
(
self
,
doc_key
,
s
=
None
):
if
(
s
is
None
):
for
key
in
self
.
keys
():
try
:
del
self
[
key
][
doc_key
]
except
:
continue
def
get_stopwords
(
self
):
def
get_stopwords
(
self
):
index
=
self
.
_index_object
index
=
self
.
_index_object
...
@@ -498,12 +542,26 @@ class Index:
...
@@ -498,12 +542,26 @@ class Index:
return
synonyms
return
synonyms
def
get_document_keys
(
self
):
d
=
{}
for
key
in
self
.
keys
():
try
:
doc_keys
=
self
[
key
].
keys
()
except
:
continue
for
doc_key
in
doc_keys
:
d
[
doc_key
]
=
1
return
d
.
keys
()
class
PersistentResultList
(
ResultList
,
PickleDictionary
.
Persistent
):
class
PersistentResultList
(
ResultList
,
PickleDictionary
.
Persistent
):
def
addentry
(
self
,
freq
,
key
):
def
addentry
(
self
,
key
,
*
info
):
'''Add a frequency/key pair to this object'''
'''Add a frequency/key pair to this object'''
self
.
_list
.
append
((
freq
,
key
)
)
ResultList
.
addentry
(
self
,
key
,
info
)
self
.
__changed__
(
1
)
self
.
__changed__
(
1
)
...
@@ -512,7 +570,7 @@ class STPResultList(ResultList, SingleThreadedTransaction.Persistent):
...
@@ -512,7 +570,7 @@ class STPResultList(ResultList, SingleThreadedTransaction.Persistent):
def
addentry
(
self
,
freq
,
key
):
def
addentry
(
self
,
freq
,
key
):
'''Add a frequency/key pair to this object'''
'''Add a frequency/key pair to this object'''
self
.
_list
.
append
((
freq
,
key
)
)
ResultList
.
addentry
(
self
,
key
,
info
)
self
.
__changed__
(
1
)
self
.
__changed__
(
1
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment