Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
41eb6fe8
Commit
41eb6fe8
authored
Jan 26, 2001
by
Christopher Petrilli
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Merge of Leixcon cleaup and text index merging.
parent
78f3476f
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
261 additions
and
174 deletions
+261
-174
lib/python/SearchIndex/GlobbingLexicon.py
lib/python/SearchIndex/GlobbingLexicon.py
+85
-56
lib/python/SearchIndex/Lexicon.py
lib/python/SearchIndex/Lexicon.py
+19
-14
lib/python/SearchIndex/UnIndex.py
lib/python/SearchIndex/UnIndex.py
+9
-7
lib/python/SearchIndex/UnKeywordIndex.py
lib/python/SearchIndex/UnKeywordIndex.py
+2
-2
lib/python/SearchIndex/UnTextIndex.py
lib/python/SearchIndex/UnTextIndex.py
+146
-95
No files found.
lib/python/SearchIndex/GlobbingLexicon.py
View file @
41eb6fe8
...
...
@@ -81,82 +81,108 @@
# many individuals on behalf of Digital Creations. Specific
# attributions are listed in the accompanying credits file.
#
##############################################################################
__doc__
=
""" Lexicon object that supports
#############################################################################
"""
from
Lexicon
import
Lexicon
from
Splitter
import
Splitter
from
intSet
import
intSet
from
UnTextIndex
import
Or
import
re
,
time
import
re
,
string
import
OIBTree
,
BTree
,
IOBTree
,
IIBTree
# Short cuts for common data containers
OIBTree
=
OIBTree
.
BTree
# Object -> Integer
OOBTree
=
BTree
.
BTree
# Object -> Object
IOBTree
=
IOBTree
.
BTree
# Integer -> Object
IIBucket
=
IIBTree
.
Bucket
# Integer -> Integer
import
pdb
class
GlobbingLexicon
(
Lexicon
):
"""
"""Lexicon which supports basic globbing function ('*' and '?').
This lexicon keeps several data structures around that are useful
for searching. They are:
Base class to support globbing lexicon object.
'_lexicon' -- Contains the mapping from word => word_id
'_inverseLex' -- Contains the mapping from word_id => word
'_digrams' -- Contains a mapping from digram => word_id
Before going further, it is necessary to understand what a digram is,
as it is a core component of the structure of this lexicon. A digram
is a two-letter sequence in a word. For example, the word 'zope'
would be converted into the digrams::
['$z', 'zo', 'op', 'pe', 'e$']
where the '$' is a word marker. It is used at the beginning and end
of the words. Those digrams are significant.
"""
multi_wc
=
'*'
single_wc
=
'?'
eow
=
'$'
def
__init__
(
self
):
self
.
counter
=
0
def
__init__
(
self
):
self
.
counter
=
0
# word id counter XXX
self
.
_lexicon
=
OIBTree
()
self
.
_inverseLex
=
IOBTree
()
self
.
_digrams
=
OOBTree
()
def
set
(
self
,
word
):
""" """
def
createDigrams
(
self
,
word
):
"""Returns a list with the set of digrams in the word."""
digrams
=
[]
digrams
.
append
(
self
.
eow
+
word
[
0
])
# Mark the beginning
for
i
in
range
(
len
(
word
)):
digrams
.
append
(
word
[
i
:
i
+
2
])
digrams
[
-
1
]
=
digrams
[
-
1
]
+
self
.
eow
# Mark the end
return
digrams
def
getWordId
(
self
,
word
):
"""Provided 'word', return the matching integer word id."""
if
self
.
_lexicon
.
has_key
(
word
):
return
self
.
_lexicon
[
word
]
else
:
word
=
intern
(
word
)
self
.
_lexicon
[
word
]
=
self
.
counter
self
.
_inverseLex
[
self
.
counter
]
=
word
return
self
.
assignWordId
(
word
)
## now, split the word into digrams and insert references
## to 'word' into the digram object. The first and last
## digrams in the list are specially marked with $ to
## indicate the beginning and end of the word
set
=
getWordId
# Kludge for old code
digrams
=
[]
digrams
.
append
(
self
.
eow
+
word
[
0
])
# mark the beginning
for
i
in
range
(
len
(
word
)
):
digrams
.
append
(
word
[
i
:
i
+
2
])
def
assignWordId
(
self
,
word
):
"""Assigns a new word id to the provided word, and return it."""
digrams
[
-
1
]
=
digrams
[
-
1
]
+
self
.
eow
# mark the end
# Double check it's not in the lexicon already, and if it is, just
# return it.
if
self
.
_lexicon
.
has_key
(
word
):
return
self
.
_lexicon
[
word
]
_digrams
=
self
.
_digrams
# First we go ahead and put the forward and reverse maps in.
self
.
_lexicon
[
word
]
=
self
.
counter
self
.
_inverseLex
[
self
.
counter
]
=
word
for
digram
in
digrams
:
set
=
_digrams
.
get
(
digram
)
# Now take all the digrams and insert them into the digram map.
for
digram
in
self
.
createDigrams
(
word
):
set
=
self
.
_digrams
.
get
(
digram
)
if
set
is
None
:
_digrams
[
digram
]
=
set
=
intSet
()
self
.
_digrams
[
digram
]
=
set
=
intSet
()
set
.
insert
(
self
.
counter
)
counter
=
self
.
counter
self
.
counter
=
self
.
counter
+
1
return
counter
return
self
.
counter
-
1
# Adjust for the previous increment
def
get
(
self
,
pattern
):
""" Query the lexicon for words matching a pattern.
"""
""" Query the lexicon for words matching a pattern."""
wc_set
=
[
self
.
multi_wc
,
self
.
single_wc
]
digrams
=
[]
...
...
@@ -199,7 +225,7 @@ class GlobbingLexicon(Lexicon):
## may contain all matching digrams, but in the wrong
## order.
expr
=
re
.
compile
(
self
.
translate
(
pattern
))
expr
=
re
.
compile
(
self
.
createRegex
(
pattern
))
words
=
[]
hits
=
[]
for
x
in
result
.
keys
():
...
...
@@ -207,14 +233,14 @@ class GlobbingLexicon(Lexicon):
hits
.
append
(
x
)
return
hits
def
__getitem__
(
self
,
word
):
""" """
return
self
.
get
(
word
)
def
query_hook
(
self
,
q
):
"""expand wildcards
"""
def
query_hook
(
self
,
q
):
"""expand wildcards"""
words
=
[]
wids
=
[]
for
w
in
q
:
...
...
@@ -230,6 +256,7 @@ class GlobbingLexicon(Lexicon):
return
words
def
Splitter
(
self
,
astring
,
words
=
None
):
""" wrap the splitter """
...
...
@@ -239,21 +266,23 @@ class GlobbingLexicon(Lexicon):
return
Splitter
(
astring
)
def
translate
(
self
,
pat
):
def
createRegex
(
self
,
pat
):
"""Translate a PATTERN to a regular expression.
There is no way to quote meta-characters.
"""
i
,
n
=
0
,
len
(
pat
)
res
=
''
while
i
<
n
:
c
=
pat
[
i
]
i
=
i
+
1
if
c
==
self
.
multi_wc
:
res
=
res
+
'.*'
elif
c
==
self
.
single_wc
:
res
=
res
+
'.?'
else
:
res
=
res
+
re
.
escape
(
c
)
return
res
+
'$'
transTable
=
string
.
maketrans
(
""
,
""
)
# First, deal with mutli-character globbing
result
=
string
.
replace
(
pat
,
'*'
,
'.*'
)
# Next, we need to deal with single-character globbing
result
=
string
.
replace
(
result
,
'?'
,
'.?'
)
# Now, we need to remove all of the characters that
# are forbidden.
result
=
string
.
translate
(
result
,
transTable
,
r'()&|!@#$%^{}\
<>
')
return "%s$" % result
lib/python/SearchIndex/Lexicon.py
View file @
41eb6fe8
...
...
@@ -83,11 +83,6 @@
#
##############################################################################
import
string
,
regex
,
ts_regex
import
regsub
__doc__
=
""" Module breaks out Zope specific methods and behavior. In
addition, provides the Lexicon class which defines a word to integer
mapping.
...
...
@@ -137,13 +132,23 @@ class Lexicon(Persistent, Implicit):
self
.
stop_syn
=
stop_syn
def
set
(
self
,
word
):
def
getWordId
(
self
,
word
):
""" return the word id of 'word' """
if
self
.
_lexicon
.
has_key
(
word
):
return
self
.
_lexicon
[
word
]
else
:
return
self
.
assignWordId
(
word
)
set
=
getWordId
def
assignWordId
(
self
,
word
):
"""Assigns a new word id to the provided word and returns it."""
# First make sure it's not already in there
if
self
.
_lexicon
.
has_key
(
word
):
return
self
.
_lexicon
[
word
]
if
not
hasattr
(
self
,
'counter'
):
self
.
counter
=
0
self
.
_lexicon
[
intern
(
word
)]
=
self
.
counter
...
...
@@ -152,8 +157,8 @@ class Lexicon(Persistent, Implicit):
def
get
(
self
,
key
,
default
=
None
):
"""
"""
return
[
self
.
_lexicon
.
get
(
key
,
default
)]
"""
Return the matched word against the key.
"""
return
[
self
.
_lexicon
.
get
WordId
(
key
,
default
)]
def
__getitem__
(
self
,
key
):
...
...
lib/python/SearchIndex/UnIndex.py
View file @
41eb6fe8
...
...
@@ -85,7 +85,7 @@
"""Simple column indices"""
__version__
=
'$Revision: 1.2
3
$'
[
11
:
-
2
]
__version__
=
'$Revision: 1.2
4
$'
[
11
:
-
2
]
...
...
@@ -197,12 +197,12 @@ class UnIndex(Persistent, Implicit):
(
'unindex_object could not remove '
'integer id %s from index %s. This '
'should not happen.'
%
(
str
(
i
),
str
(
k
))))
%
(
str
(
documentId
),
str
(
self
.
id
))))
else
:
LOG
(
self
.
__class__
.
__name__
,
ERROR
,
(
'unindex_object tried to retrieve set %s '
'from index %s but couldn
\
'
t. This '
'should not happen.'
%
(
repr
(
set
),
str
(
k
))))
'should not happen.'
%
(
repr
(
entry
),
str
(
self
.
id
))))
def
insertForwardIndexEntry
(
self
,
entry
,
documentId
):
...
...
@@ -234,17 +234,19 @@ class UnIndex(Persistent, Implicit):
datum
=
getattr
(
obj
,
self
.
id
)
if
callable
(
datum
):
datum
=
datum
()
except
:
except
AttributeError
:
datum
=
MV
# We don't want to do anything that we don't have to here, so we'll
# check to see if the new and existing information is the same.
if
not
(
datum
==
self
.
_unindex
.
get
(
documentId
,
MV
)):
oldDatum
=
self
.
_unindex
.
get
(
documentId
,
MV
)
if
not
datum
==
oldDatum
:
if
oldDatum
is
not
MV
:
self
.
removeForwardIndexEntry
(
oldDatum
,
documentId
)
self
.
insertForwardIndexEntry
(
datum
,
documentId
)
self
.
_unindex
[
documentId
]
=
datum
returnStatus
=
1
self
.
_p_changed
=
1
# Tickle the transaction
return
returnStatus
...
...
lib/python/SearchIndex/UnKeywordIndex.py
View file @
41eb6fe8
...
...
@@ -115,7 +115,7 @@ class UnKeywordIndex(UnIndex):
newKeywords
=
getattr
(
obj
,
self
.
id
)
if
callable
(
newKeywords
):
newKeywords
=
newKeywords
()
except
:
except
Except
:
newKeywords
=
MV
if
type
(
newKeywords
)
is
StringType
:
...
...
@@ -162,7 +162,7 @@ class UnKeywordIndex(UnIndex):
except
TypeError
:
return
0
self
.
_unindex
[
documentId
]
=
newKeywords
self
.
_unindex
[
documentId
]
=
newKeywords
[:]
# Make a copy
return
1
...
...
lib/python/SearchIndex/UnTextIndex.py
View file @
41eb6fe8
...
...
@@ -89,43 +89,58 @@ The UnTextIndex falls under the 'I didnt have a better name for it'
excuse. It is an 'Un' Text index because it stores a little bit of
undo information so that objects can be unindexed when the old value
is no longer known.
"""
__version__
=
'$Revision: 1.35 $'
[
11
:
-
2
]
"""
__version__
=
'$Revision: 1.34 $'
[
11
:
-
2
]
import
BTree
,
IIBTree
,
IOBTree
,
OIBTree
import
string
,
regex
,
regsub
,
ts_regex
import
operator
from
intSet
import
intSet
from
Globals
import
Persistent
import
BTree
,
IIBTree
,
IOBTree
,
OIBTree
from
Acquisition
import
Implicit
BTree
=
BTree
.
BTree
IOBTree
=
IOBTree
.
BTree
IIBucket
=
IIBTree
.
Bucket
OIBTree
=
OIBTree
.
BTree
from
intSet
import
intSet
import
operator
from
Splitter
import
Splitter
from
string
import
strip
import
string
,
regex
,
regsub
,
ts_regex
from
zLOG
import
LOG
,
ERROR
from
types
import
*
from
Lexicon
import
Lexicon
,
stop_word_dict
from
Lexicon
import
Lexicon
from
ResultList
import
ResultList
from
types
import
*
BTree
=
BTree
.
BTree
# Regular generic BTree
IOBTree
=
IOBTree
.
BTree
# Integer -> Object
IIBucket
=
IIBTree
.
Bucket
# Integer -> Integer
OIBTree
=
OIBTree
.
BTree
# Object -> Integer
AndNot
=
'andnot'
And
=
'and'
Or
=
'or'
Near
=
'...'
QueryError
=
'TextIndex.QueryError'
QueryError
=
'TextIndex.QueryError'
class
UnTextIndex
(
Persistent
,
Implicit
):
"""Full-text index.
There is a ZCatalog UML model that sheds some light on what is
going on here. '_index' is a BTree which maps word ids to mapping
from document id to score. Something like:
{'bob' : {1 : 5, 2 : 3, 42 : 9}}
{'uncle' : {1 : 1}}
The '_unindex' attribute is a mapping from document id to word
ids. This mapping allows the catalog to unindex an object:
{42 : ('bob', 'is', 'your', 'uncle')
This isn't exactly how things are represented in memory, many
optimizations happen along the way."""
meta_type
=
'Text Index'
def
__init__
(
self
,
id
=
None
,
ignore_ex
=
None
,
call_methods
=
None
,
lexicon
=
None
):
"""Create an index
...
...
@@ -142,49 +157,33 @@ class UnTextIndex(Persistent, Implicit):
of getattr or getitem to get an attribute.
'lexicon' is the lexicon object to specify, if None, the
index will use a private lexicon.
There is a ZCatalog UML model that sheds some light on what is
going on here. '_index' is a BTree which maps word ids to
mapping from document id to score. Something like:
{'bob' : {1 : 5, 2 : 3, 42 : 9}}
{'uncle' : {1 : 1}}
The '_unindex' attribute is a mapping from document id to word
ids. This mapping allows the catalog to unindex an object:
{42 : ('bob', 'is', 'your', 'uncle')
index will use a private lexicon."""
This isn't exactly how things are represented in memory, many
optimizations happen along the way.
"""
if
not
id
==
ignore_ex
==
call_methods
==
None
:
self
.
id
=
id
self
.
ignore_ex
=
ignore_ex
self
.
call_methods
=
call_methods
self
.
_index
=
IOBTree
()
self
.
_unindex
=
IOBTree
()
if
not
id
==
ignore_ex
==
call_methods
==
None
:
self
.
id
=
id
self
.
ignore_ex
=
ignore_ex
self
.
call_methods
=
call_methods
self
.
_index
=
IOBTree
()
self
.
_unindex
=
IOBTree
()
else
:
pass
if
lexicon
is
None
:
## if no lexicon is provided, create a default one
self
.
_lexicon
=
Lexicon
()
self
.
_lexicon
=
Lexicon
()
else
:
self
.
_lexicon
=
lexicon
def
getLexicon
(
self
,
vocab_id
):
"""Return the Lexicon in use.
""" bit of a hack, indexes have been made acquirers so that
they can acquire a vocabulary object from the object system in
Bit of a hack, indexes have been made acquirers so that they
can acquire a vocabulary object from the object system in
Zope. I don't think indexes were ever intended to participate
in this way, but I don't see too much of a problem with it.
"""
in this way, but I don't see too much of a problem with it.
"""
if
type
(
vocab_id
)
is
not
StringType
:
vocab
=
vocab_id
else
:
...
...
@@ -193,10 +192,14 @@ class UnTextIndex(Persistent, Implicit):
def
__len__
(
self
):
"""Return the number of objects indexed."""
return
len
(
self
.
_unindex
)
def
clear
(
self
):
"""Reinitialize the text index."""
self
.
_index
=
IOBTree
()
self
.
_unindex
=
IOBTree
()
...
...
@@ -214,6 +217,10 @@ class UnTextIndex(Persistent, Implicit):
def
getEntryForObject
(
self
,
rid
,
default
=
None
):
"""Get all information contained for a specific object.
This takes the objects record ID as it's main argument."""
wordMap
=
self
.
getLexicon
(
self
.
_lexicon
).
_lexicon
.
items
()
results
=
self
.
_unindex
.
get
(
rid
,
None
)
...
...
@@ -247,12 +254,21 @@ class UnTextIndex(Persistent, Implicit):
# Tuples are only used for rows which have only
# a single entry. Since we now need more, we'll
# promote it to a mapping object (dictionary).
# First, make sure we're not already in it, if so
# update the score if necessary.
if
indexRow
[
0
]
==
documentId
:
if
indexRow
[
1
]
!=
score
:
indexRow
=
(
documentId
,
score
)
else
:
indexRow
=
{
indexRow
[
0
]:
indexRow
[
1
]
}
indexRow
[
documentId
]
=
score
self
.
_index
[
entry
]
=
indexRow
elif
type
(
indexRow
)
is
DictType
:
if
len
(
indexRow
)
>
4
:
if
indexRow
.
has_key
(
documentId
):
if
indexRow
[
documentId
]
==
score
:
return
1
# No need to update
elif
len
(
indexRow
)
>
4
:
# We have a mapping (dictionary), but it has
# grown too large, so we'll convert it to a
# bucket.
...
...
@@ -266,6 +282,9 @@ class UnTextIndex(Persistent, Implicit):
indexRow
[
documentId
]
=
score
else
:
# We've got a IIBucket already.
if
indexRow
.
has_key
(
documentId
):
if
indexRow
[
documentId
]
==
score
:
return
1
indexRow
[
documentId
]
=
score
else
:
# We don't have any information at this point, so we'll
...
...
@@ -277,13 +296,43 @@ class UnTextIndex(Persistent, Implicit):
def
insertReverseIndexEntry
(
self
,
entry
,
documentId
):
"""Insert the correct entry into the reverse indexes for future
unindexing."""
newEntry
=
self
.
_unindex
.
get
(
documentId
,
[])
newEntry
.
append
(
entry
)
self
.
_unindex
[
documentId
]
=
newEntry
newRow
=
self
.
_unindex
.
get
(
documentId
,
[])
if
newRow
:
# Catch cases where we don't need to modify anything
if
entry
in
newRow
:
return
1
newRow
.
append
(
entry
)
self
.
_unindex
[
documentId
]
=
newRow
def
index_object
(
self
,
documentId
,
obj
,
threshold
=
None
):
def
removeReverseEntry
(
self
,
entry
,
documentId
):
"""Removes a single entry from the reverse index."""
newRow
=
self
.
_unindex
.
get
(
documentId
,
[])
if
newRow
:
try
:
newRow
.
remove
(
entry
)
except
ValueError
:
pass
# We don't have it, this is bad
self
.
_unindex
[
documentId
]
=
newRow
def
removeForwardEntry
(
self
,
entry
,
documentId
):
"""Remove a single entry from the forward index."""
currentRow
=
self
.
_index
.
get
(
entry
,
None
)
if
type
(
currentRow
)
is
TupleType
:
del
self
.
_index
[
entry
]
elif
currentRow
is
not
None
:
try
:
del
self
.
_index
[
entry
][
documentId
]
except
(
KeyError
,
IndexError
,
TypeError
):
LOG
(
'UnTextIndex'
,
ERROR
,
'unindex_object tried to unindex nonexistent'
' document %s'
%
str
(
i
))
def
index_object
(
self
,
documentId
,
obj
,
threshold
=
None
):
""" Index an object:
'documentId' is the integer id of the document
...
...
@@ -301,7 +350,7 @@ class UnTextIndex(Persistent, Implicit):
source
=
str
(
source
())
else
:
source
=
str
(
source
)
except
:
except
AttributeError
:
return
0
...
...
@@ -322,32 +371,36 @@ class UnTextIndex(Persistent, Implicit):
else
:
wordList
[
word
]
=
1
index
=
self
.
_index
unindex
=
self
.
_unindex
lexicon
=
self
.
getLexicon
(
self
.
_lexicon
)
unindex
[
documentId
]
=
[]
# XXX
currentWordIds
=
self
.
_unindex
.
get
(
documentId
,
[])
wordCount
=
0
# First deal with deleted words
# To do this, the first thing we have to do is convert the
# existing words to words, from wordIDS
wordListAsIds
=
OIBTree
()
for
word
,
score
in
wordList
.
items
():
if
threshold
is
not
None
:
if
((
wordCount
%
threshold
)
==
0
)
and
not
(
wordCount
==
0
):
# commit a subtransaction hack
get_transaction
().
commit
(
1
)
# kick the cache
self
.
_p_jar
.
cacheFullSweep
(
1
)
wordId
=
lexicon
.
set
(
word
)
wordListAsIds
[
lexicon
.
getWordId
(
word
)]
=
score
for
word
in
currentWordIds
:
if
not
wordListAsIds
.
has_key
(
word
):
self
.
removeForwardEntry
(
word
,
documentId
)
#import pdb; pdb.set_trace()
# Now we can deal with new/updated entries
for
wordId
,
score
in
wordListAsIds
.
items
():
self
.
insertForwardIndexEntry
(
wordId
,
documentId
,
score
)
self
.
insertReverseIndexEntry
(
wordId
,
documentId
)
wordCount
=
wordCount
+
1
#
# r
eturn the number of words you indexed
#
R
eturn the number of words you indexed
return
wordCount
def
unindex_object
(
self
,
i
):
""" carefully unindex document with integer id 'i' from the text
index and do not fail if it does not exist """
index
=
self
.
_index
unindex
=
self
.
_unindex
val
=
unindex
.
get
(
i
,
None
)
...
...
@@ -385,7 +438,7 @@ class UnTextIndex(Persistent, Implicit):
if
len
(
splitSource
)
==
1
:
splitSource
=
splitSource
[
0
]
if
splitSource
[:
1
]
==
'"'
and
splitSource
[
-
1
:]
==
'"'
:
if
splitSource
[:
1
]
==
'"'
and
splitSource
[
-
1
:]
==
'"'
:
return
self
[
splitSource
]
r
=
self
.
_index
.
get
(
...
...
@@ -429,13 +482,13 @@ class UnTextIndex(Persistent, Implicit):
return
None
if
type
(
keys
)
is
StringType
:
if
not
keys
or
not
strip
(
keys
):
if
not
keys
or
not
stri
ng
.
stri
p
(
keys
):
return
None
keys
=
[
keys
]
r
=
None
for
key
in
keys
:
key
=
strip
(
key
)
key
=
stri
ng
.
stri
p
(
key
)
if
not
key
:
continue
...
...
@@ -480,11 +533,11 @@ class UnTextIndex(Persistent, Implicit):
def
_subindex
(
self
,
isrc
,
d
,
old
,
last
):
src
=
self
.
getLexicon
(
self
.
_lexicon
).
Splitter
(
isrc
)
for
s
in
src
:
if
s
[
0
]
==
'
\
"
'
:
last
=
self
.
subindex
(
s
[
1
:
-
1
],
d
,
old
,
last
)
if
s
[
0
]
==
'
\
"
'
:
last
=
self
.
subindex
(
s
[
1
:
-
1
],
d
,
old
,
last
)
else
:
if
old
(
s
):
if
s
!=
last
:
d
[
s
]
=
d
[
s
]
+
1
...
...
@@ -493,15 +546,12 @@ class UnTextIndex(Persistent, Implicit):
return
last
def
query
(
self
,
s
,
default_operator
=
Or
,
ws
=
(
string
.
whitespace
,)):
"""
This is called by TextIndexes. A 'query term' which is a string
's' is passed in, along with an index object. s is parsed, then
the wildcards are parsed, then something is parsed again, then the
whole thing is 'evaluated'
def
query
(
self
,
s
,
default_operator
=
Or
,
ws
=
(
string
.
whitespace
,)):
""" This is called by TextIndexes. A 'query term' which is a
string 's' is passed in, along with an index object. s is
parsed, then the wildcards are parsed, then something is
parsed again, then the whole thing is 'evaluated'. """
"""
# First replace any occurences of " and not " with " andnot "
s
=
ts_regex
.
gsub
(
'[%s]+[aA][nN][dD][%s]*[nN][oO][tT][%s]+'
%
(
ws
*
3
),
...
...
@@ -523,7 +573,8 @@ class UnTextIndex(Persistent, Implicit):
def
get_operands
(
self
,
q
,
i
):
'''Evaluate and return the left and right operands for an operator'''
"""Evaluate and return the left and right operands for an operator"""
try
:
left
=
q
[
i
-
1
]
right
=
q
[
i
+
1
]
...
...
@@ -550,7 +601,7 @@ class UnTextIndex(Persistent, Implicit):
def
evaluate
(
self
,
query
):
'''Evaluate a parsed query'''
"""Evaluate a parsed query"""
# There are two options if the query passed in is only one
# item. It means either it's an embedded query, in which case
# we'll recursively evaluate, other wise it's nothing for us
...
...
@@ -602,7 +653,7 @@ class UnTextIndex(Persistent, Implicit):
def
parse
(
s
):
'''Parse parentheses and quotes'''
"""Parse parentheses and quotes"""
l
=
[]
tmp
=
string
.
lower
(
s
)
...
...
@@ -625,10 +676,10 @@ def parse(s):
return
l
def
parse2
(
q
,
default_operator
,
operator_dict
=
{
AndNot
:
AndNot
,
And
:
And
,
Or
:
Or
,
Near
:
Near
}):
'''Find operators and operands'''
operator_dict
=
{
AndNot
:
AndNot
,
And
:
And
,
Or
:
Or
,
Near
:
Near
}):
"""Find operators and operands"""
i
=
0
isop
=
operator_dict
.
has_key
isop
=
operator_dict
.
has_key
while
(
i
<
len
(
q
)):
if
(
type
(
q
[
i
])
is
ListType
):
q
[
i
]
=
parse2
(
q
[
i
],
default_operator
)
...
...
@@ -646,9 +697,9 @@ def parse2(q, default_operator,
return
q
def
parens
(
s
,
parens_re
=
regex
.
compile
(
'(
\
|)
'
).search):
def
parens
(
s
,
parens_re
=
regex
.
compile
(
'(
\
|)
'
).search):
index
=open_index=
paren_count = 0
index
= open_index =
paren_count = 0
while 1:
index = parens_re(s, index)
...
...
@@ -672,7 +723,7 @@ def parens(s, parens_re = regex.compile('(\|)').search):
def quotes(s, ws
=
(string.whitespace,)):
def quotes(s, ws
=
(string.whitespace,)):
# split up quoted regions
splitted = ts_regex.split(s, '
[
%
s
]
*
\
"[%s]*' % (ws * 2))
split=string.split
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment