Commit aee22894 authored by Tim Peters's avatar Tim Peters

Whitespace normalization.

parent d0584c06
......@@ -17,9 +17,9 @@ class HTMLWordSplitter:
splat = []
for t in text:
splat += self._split(t)
return splat
return splat
def _split(self, text):
def _split(self, text):
text = text.lower()
remove = ["<[^>]*>",
"&[A-Za-z]+;",
......
......@@ -59,7 +59,7 @@ class Lexicon:
if wid is not None:
wids.append(wid)
return wids
def get_word(self, wid):
"""Return the word for the given word id"""
return self._words[wid]
......
"""Rice coding (a varaitn of Golomb coding)
Based on a Java implementation by Glen McCluskey described in a Usenix
;login: article at
;login: article at
http://www.usenix.org/publications/login/2000-4/features/java.html
McCluskey's article explains the approach as follows. The encoding
......@@ -33,7 +33,7 @@ class BitArray:
def __getitem__(self, i):
byte, offset = divmod(i, 8)
mask = 2 ** offset
mask = 2 ** offset
if self.bytes[byte] & mask:
return 1
else:
......@@ -41,12 +41,12 @@ class BitArray:
def __setitem__(self, i, val):
byte, offset = divmod(i, 8)
mask = 2 ** offset
mask = 2 ** offset
if val:
self.bytes[byte] |= mask
else:
self.bytes[byte] &= ~mask
def __len__(self):
return self.nbits
......@@ -78,7 +78,7 @@ class RiceCode:
def init(self, m):
self.m = m
self.lower = (1 << m) - 1
self.lower = (1 << m) - 1
self.mask = 1 << (m - 1)
def append(self, val):
......@@ -123,7 +123,7 @@ class RiceCode:
def tostring(self):
"""Return a binary string containing the encoded data.
The binary string may contain some extra zeros at the end.
"""
return self.bits.tostring()
......
......@@ -36,25 +36,25 @@ from Products.ZCTextIndex.QueryParser import QueryParser
class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
"""Persistent TextIndex"""
__implements__ = PluggableIndexInterface
meta_type = 'ZCTextIndex'
manage_options= (
{'label': 'Settings', 'action': 'manage_main'},
)
query_options = ['query']
def __init__(self, id, extra, caller, index_factory=Index):
self.id = id
self._fieldname = extra.doc_attr
lexicon = getattr(caller, extra.lexicon_id, None)
if lexicon is None:
raise LookupError, 'Lexicon "%s" not found' % extra.lexicon_id
if not ILexicon.isImplementedBy(lexicon):
raise ValueError, \
'Object "%s" does not implement lexicon interface' \
......@@ -63,7 +63,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
self.lexicon = lexicon
self.index = index_factory(self.lexicon)
self.parser = QueryParser()
## Pluggable Index APIs ##
def index_object(self, docid, obj, threshold=None):
......@@ -78,7 +78,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
def _apply_index(self, request, cid=''):
"""Apply query specified by request, a mapping containing the query.
Returns two object on success, the resultSet containing the
matching record numbers and a tuple containing the names of
the fields used
......@@ -86,7 +86,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
Returns None if request is not valid for this index.
"""
record = parseIndexRequest(request, self.id, self.query_options)
if record.keys is None:
if record.keys is None:
return None
query_str = ' '.join(record.keys)
tree = self.parser.parseQuery(query_str)
......@@ -100,11 +100,11 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
chooser = NBest(nbest)
chooser.addmany(results.items())
return chooser.getbest()
def numObjects(self):
"""Return number of object indexed"""
return self.index.length()
def getEntryForObject(self, documentId, default=None):
"""Return the list of words indexed for documentId"""
try:
......@@ -113,28 +113,28 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
return default
get_word = self.lexicon.get_word
return [get_word(wid) for wid in word_ids]
def clear(self):
"""reinitialize the index"""
self.index = Index(self.lexicon)
def _get_object_text(self, obj):
x = getattr(obj, self._fieldname)
if callable(x):
return x()
else:
return x
## User Interface Methods ##
manage_main = DTMLFile('dtml/manageZCTextIndex', globals())
InitializeClass(ZCTextIndex)
def manage_addZCTextIndex(self, id, extra=None, REQUEST=None,
def manage_addZCTextIndex(self, id, extra=None, REQUEST=None,
RESPONSE=None):
"""Add a text index"""
return self.manage_addIndex(id, 'ZCTextIndex', extra,
return self.manage_addIndex(id, 'ZCTextIndex', extra,
REQUEST, RESPONSE, REQUEST.URL3)
manage_addZCTextIndexForm = DTMLFile('dtml/addZCTextIndex', globals())
......@@ -155,17 +155,15 @@ def manage_addLexicon(self, id, title, splitter=None, normalizer=None,
self._setObject(id, lexicon)
if REQUEST is not None:
return self.manage_main(self, REQUEST, update_menu=1)
class PLexicon(Lexicon, Persistent, Acquisition.Implicit, SimpleItem):
"""Persistent Lexcion for ZCTextIndex"""
meta_type = 'ZCTextIndex Lexicon'
def __init__(self, id, title='', *pipeline):
self.id = str(id)
self.title = str(title)
PLexicon.inheritedAttribute('__init__')(self, *pipeline)
InitializeClass(PLexicon)
......@@ -122,5 +122,5 @@ if __name__ == "__main__":
filename, = args
else:
filename = "profile.dat"
main(filename, annotate_p)
......@@ -70,13 +70,13 @@ if __name__ == "__main__":
print msg
print __doc__
sys.exit(2)
for o, v in opts:
if o == '-v':
VERBOSE += 1
if o == '-f':
FSPATH = v
if len(args) != 1:
print "Expected on argument"
print __doc__
......@@ -91,5 +91,4 @@ if __name__ == "__main__":
print dir
main(db, rt, dir)
cn.close()
fs.close()
fs.close()
......@@ -74,7 +74,7 @@ class TestQueryParser(TestCase):
self.expect('"foo bar"', PhraseNode("foo bar"))
self.expect("foo bar", AndNode([AtomNode("foo"), AtomNode("bar")]))
self.expect('(("foo bar"))"', PhraseNode("foo bar"))
self.expect("((foo bar))", AndNode([AtomNode("foo"), AtomNode("bar")]))
......
......@@ -10,11 +10,11 @@ import unittest
class Indexable:
def __init__(self, text):
self.text = text
class LexiconHolder:
def __init__(self, lexicon):
self.lexicon = lexicon
class Extra:
pass
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment