Commit aee22894 authored by Tim Peters's avatar Tim Peters

Whitespace normalization.

parent d0584c06
...@@ -17,9 +17,9 @@ class HTMLWordSplitter: ...@@ -17,9 +17,9 @@ class HTMLWordSplitter:
splat = [] splat = []
for t in text: for t in text:
splat += self._split(t) splat += self._split(t)
return splat return splat
def _split(self, text): def _split(self, text):
text = text.lower() text = text.lower()
remove = ["<[^>]*>", remove = ["<[^>]*>",
"&[A-Za-z]+;", "&[A-Za-z]+;",
......
...@@ -59,7 +59,7 @@ class Lexicon: ...@@ -59,7 +59,7 @@ class Lexicon:
if wid is not None: if wid is not None:
wids.append(wid) wids.append(wid)
return wids return wids
def get_word(self, wid): def get_word(self, wid):
"""Return the word for the given word id""" """Return the word for the given word id"""
return self._words[wid] return self._words[wid]
......
"""Rice coding (a varaitn of Golomb coding) """Rice coding (a varaitn of Golomb coding)
Based on a Java implementation by Glen McCluskey described in a Usenix Based on a Java implementation by Glen McCluskey described in a Usenix
;login: article at ;login: article at
http://www.usenix.org/publications/login/2000-4/features/java.html http://www.usenix.org/publications/login/2000-4/features/java.html
McCluskey's article explains the approach as follows. The encoding McCluskey's article explains the approach as follows. The encoding
...@@ -33,7 +33,7 @@ class BitArray: ...@@ -33,7 +33,7 @@ class BitArray:
def __getitem__(self, i): def __getitem__(self, i):
byte, offset = divmod(i, 8) byte, offset = divmod(i, 8)
mask = 2 ** offset mask = 2 ** offset
if self.bytes[byte] & mask: if self.bytes[byte] & mask:
return 1 return 1
else: else:
...@@ -41,12 +41,12 @@ class BitArray: ...@@ -41,12 +41,12 @@ class BitArray:
def __setitem__(self, i, val): def __setitem__(self, i, val):
byte, offset = divmod(i, 8) byte, offset = divmod(i, 8)
mask = 2 ** offset mask = 2 ** offset
if val: if val:
self.bytes[byte] |= mask self.bytes[byte] |= mask
else: else:
self.bytes[byte] &= ~mask self.bytes[byte] &= ~mask
def __len__(self): def __len__(self):
return self.nbits return self.nbits
...@@ -78,7 +78,7 @@ class RiceCode: ...@@ -78,7 +78,7 @@ class RiceCode:
def init(self, m): def init(self, m):
self.m = m self.m = m
self.lower = (1 << m) - 1 self.lower = (1 << m) - 1
self.mask = 1 << (m - 1) self.mask = 1 << (m - 1)
def append(self, val): def append(self, val):
...@@ -123,7 +123,7 @@ class RiceCode: ...@@ -123,7 +123,7 @@ class RiceCode:
def tostring(self): def tostring(self):
"""Return a binary string containing the encoded data. """Return a binary string containing the encoded data.
The binary string may contain some extra zeros at the end. The binary string may contain some extra zeros at the end.
""" """
return self.bits.tostring() return self.bits.tostring()
......
...@@ -36,25 +36,25 @@ from Products.ZCTextIndex.QueryParser import QueryParser ...@@ -36,25 +36,25 @@ from Products.ZCTextIndex.QueryParser import QueryParser
class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
"""Persistent TextIndex""" """Persistent TextIndex"""
__implements__ = PluggableIndexInterface __implements__ = PluggableIndexInterface
meta_type = 'ZCTextIndex' meta_type = 'ZCTextIndex'
manage_options= ( manage_options= (
{'label': 'Settings', 'action': 'manage_main'}, {'label': 'Settings', 'action': 'manage_main'},
) )
query_options = ['query'] query_options = ['query']
def __init__(self, id, extra, caller, index_factory=Index): def __init__(self, id, extra, caller, index_factory=Index):
self.id = id self.id = id
self._fieldname = extra.doc_attr self._fieldname = extra.doc_attr
lexicon = getattr(caller, extra.lexicon_id, None) lexicon = getattr(caller, extra.lexicon_id, None)
if lexicon is None: if lexicon is None:
raise LookupError, 'Lexicon "%s" not found' % extra.lexicon_id raise LookupError, 'Lexicon "%s" not found' % extra.lexicon_id
if not ILexicon.isImplementedBy(lexicon): if not ILexicon.isImplementedBy(lexicon):
raise ValueError, \ raise ValueError, \
'Object "%s" does not implement lexicon interface' \ 'Object "%s" does not implement lexicon interface' \
...@@ -63,7 +63,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -63,7 +63,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
self.lexicon = lexicon self.lexicon = lexicon
self.index = index_factory(self.lexicon) self.index = index_factory(self.lexicon)
self.parser = QueryParser() self.parser = QueryParser()
## Pluggable Index APIs ## ## Pluggable Index APIs ##
def index_object(self, docid, obj, threshold=None): def index_object(self, docid, obj, threshold=None):
...@@ -78,7 +78,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -78,7 +78,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
def _apply_index(self, request, cid=''): def _apply_index(self, request, cid=''):
"""Apply query specified by request, a mapping containing the query. """Apply query specified by request, a mapping containing the query.
Returns two object on success, the resultSet containing the Returns two object on success, the resultSet containing the
matching record numbers and a tuple containing the names of matching record numbers and a tuple containing the names of
the fields used the fields used
...@@ -86,7 +86,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -86,7 +86,7 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
Returns None if request is not valid for this index. Returns None if request is not valid for this index.
""" """
record = parseIndexRequest(request, self.id, self.query_options) record = parseIndexRequest(request, self.id, self.query_options)
if record.keys is None: if record.keys is None:
return None return None
query_str = ' '.join(record.keys) query_str = ' '.join(record.keys)
tree = self.parser.parseQuery(query_str) tree = self.parser.parseQuery(query_str)
...@@ -100,11 +100,11 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -100,11 +100,11 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
chooser = NBest(nbest) chooser = NBest(nbest)
chooser.addmany(results.items()) chooser.addmany(results.items())
return chooser.getbest() return chooser.getbest()
def numObjects(self): def numObjects(self):
"""Return number of object indexed""" """Return number of object indexed"""
return self.index.length() return self.index.length()
def getEntryForObject(self, documentId, default=None): def getEntryForObject(self, documentId, default=None):
"""Return the list of words indexed for documentId""" """Return the list of words indexed for documentId"""
try: try:
...@@ -113,28 +113,28 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem): ...@@ -113,28 +113,28 @@ class ZCTextIndex(Persistent, Acquisition.Implicit, SimpleItem):
return default return default
get_word = self.lexicon.get_word get_word = self.lexicon.get_word
return [get_word(wid) for wid in word_ids] return [get_word(wid) for wid in word_ids]
def clear(self): def clear(self):
"""reinitialize the index""" """reinitialize the index"""
self.index = Index(self.lexicon) self.index = Index(self.lexicon)
def _get_object_text(self, obj): def _get_object_text(self, obj):
x = getattr(obj, self._fieldname) x = getattr(obj, self._fieldname)
if callable(x): if callable(x):
return x() return x()
else: else:
return x return x
## User Interface Methods ## ## User Interface Methods ##
manage_main = DTMLFile('dtml/manageZCTextIndex', globals()) manage_main = DTMLFile('dtml/manageZCTextIndex', globals())
InitializeClass(ZCTextIndex) InitializeClass(ZCTextIndex)
def manage_addZCTextIndex(self, id, extra=None, REQUEST=None, def manage_addZCTextIndex(self, id, extra=None, REQUEST=None,
RESPONSE=None): RESPONSE=None):
"""Add a text index""" """Add a text index"""
return self.manage_addIndex(id, 'ZCTextIndex', extra, return self.manage_addIndex(id, 'ZCTextIndex', extra,
REQUEST, RESPONSE, REQUEST.URL3) REQUEST, RESPONSE, REQUEST.URL3)
manage_addZCTextIndexForm = DTMLFile('dtml/addZCTextIndex', globals()) manage_addZCTextIndexForm = DTMLFile('dtml/addZCTextIndex', globals())
...@@ -155,17 +155,15 @@ def manage_addLexicon(self, id, title, splitter=None, normalizer=None, ...@@ -155,17 +155,15 @@ def manage_addLexicon(self, id, title, splitter=None, normalizer=None,
self._setObject(id, lexicon) self._setObject(id, lexicon)
if REQUEST is not None: if REQUEST is not None:
return self.manage_main(self, REQUEST, update_menu=1) return self.manage_main(self, REQUEST, update_menu=1)
class PLexicon(Lexicon, Persistent, Acquisition.Implicit, SimpleItem): class PLexicon(Lexicon, Persistent, Acquisition.Implicit, SimpleItem):
"""Persistent Lexcion for ZCTextIndex""" """Persistent Lexcion for ZCTextIndex"""
meta_type = 'ZCTextIndex Lexicon' meta_type = 'ZCTextIndex Lexicon'
def __init__(self, id, title='', *pipeline): def __init__(self, id, title='', *pipeline):
self.id = str(id) self.id = str(id)
self.title = str(title) self.title = str(title)
PLexicon.inheritedAttribute('__init__')(self, *pipeline) PLexicon.inheritedAttribute('__init__')(self, *pipeline)
InitializeClass(PLexicon) InitializeClass(PLexicon)
...@@ -122,5 +122,5 @@ if __name__ == "__main__": ...@@ -122,5 +122,5 @@ if __name__ == "__main__":
filename, = args filename, = args
else: else:
filename = "profile.dat" filename = "profile.dat"
main(filename, annotate_p) main(filename, annotate_p)
...@@ -70,13 +70,13 @@ if __name__ == "__main__": ...@@ -70,13 +70,13 @@ if __name__ == "__main__":
print msg print msg
print __doc__ print __doc__
sys.exit(2) sys.exit(2)
for o, v in opts: for o, v in opts:
if o == '-v': if o == '-v':
VERBOSE += 1 VERBOSE += 1
if o == '-f': if o == '-f':
FSPATH = v FSPATH = v
if len(args) != 1: if len(args) != 1:
print "Expected on argument" print "Expected on argument"
print __doc__ print __doc__
...@@ -91,5 +91,4 @@ if __name__ == "__main__": ...@@ -91,5 +91,4 @@ if __name__ == "__main__":
print dir print dir
main(db, rt, dir) main(db, rt, dir)
cn.close() cn.close()
fs.close() fs.close()
...@@ -74,7 +74,7 @@ class TestQueryParser(TestCase): ...@@ -74,7 +74,7 @@ class TestQueryParser(TestCase):
self.expect('"foo bar"', PhraseNode("foo bar")) self.expect('"foo bar"', PhraseNode("foo bar"))
self.expect("foo bar", AndNode([AtomNode("foo"), AtomNode("bar")])) self.expect("foo bar", AndNode([AtomNode("foo"), AtomNode("bar")]))
self.expect('(("foo bar"))"', PhraseNode("foo bar")) self.expect('(("foo bar"))"', PhraseNode("foo bar"))
self.expect("((foo bar))", AndNode([AtomNode("foo"), AtomNode("bar")])) self.expect("((foo bar))", AndNode([AtomNode("foo"), AtomNode("bar")]))
......
...@@ -10,11 +10,11 @@ import unittest ...@@ -10,11 +10,11 @@ import unittest
class Indexable: class Indexable:
def __init__(self, text): def __init__(self, text):
self.text = text self.text = text
class LexiconHolder: class LexiconHolder:
def __init__(self, lexicon): def __init__(self, lexicon):
self.lexicon = lexicon self.lexicon = lexicon
class Extra: class Extra:
pass pass
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment