Commit b3048e12 authored by Tres Seaver's avatar Tres Seaver

 - Land new pluggable index types, specialized for date values and
   date ranges, from branch.
parent ab261465
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
from DateTime.DateTime import DateTime
from Products.PluginIndexes import PluggableIndex
from Products.PluginIndexes.common.UnIndex import UnIndex
from Products.PluginIndexes.common.util import parseIndexRequest
from types import StringType, FloatType, IntType
from Globals import DTMLFile
from BTrees.IOBTree import IOBTree
from BTrees.OIBTree import OIBTree
from BTrees.IIBTree import IISet, union, intersection
_marker = []
class DateIndex(UnIndex):
""" Index for Dates """
__implements__ = (PluggableIndex.PluggableIndexInterface,)
meta_type = 'DateIndex'
query_options = ['query', 'range']
manage = manage_main = DTMLFile( 'dtml/manageDateIndex', globals() )
manage_main._setName( 'manage_main' )
manage_options = ( { 'label' : 'Settings'
, 'action' : 'manage_main'
},
)
def clear( self ):
""" Complete reset """
self._index = IOBTree()
self._unindex = OIBTree()
def index_object( self, documentId, obj, threshold=None ):
"""index an object, normalizing the indexed value to an integer
o Normalized value has granularity of one minute.
o Objects which have 'None' as indexed value are *omitted*,
by design.
"""
returnStatus = 0
try:
date_attr = getattr( obj, self.id )
if callable( date_attr ):
date_attr = date_attr()
ConvertedDate = self._convert( value=date_attr, default=_marker )
except AttributeError:
ConvertedDate = _marker
oldConvertedDate = self._unindex.get( documentId, _marker )
if ConvertedDate != oldConvertedDate:
if oldConvertedDate is not _marker:
self.removeForwardIndexEntry(oldConvertedDate, documentId)
if ConvertedDate is not _marker:
self.insertForwardIndexEntry( ConvertedDate, documentId )
self._unindex[documentId] = ConvertedDate
returnStatus = 1
return returnStatus
def _apply_index( self, request, cid='', type=type, None=None ):
"""Apply the index to query parameters given in the argument
Normalize the 'query' arguments into integer values at minute
precision before querying.
"""
record = parseIndexRequest( request, self.id, self.query_options )
if record.keys == None:
return None
keys = map( self._convert, record.keys )
index = self._index
r = None
opr = None
#experimental code for specifing the operator
operator = record.get( 'operator', self.useOperator )
if not operator in self.operators :
raise RuntimeError, "operator not valid: %s" % operator
# depending on the operator we use intersection or union
if operator=="or":
set_func = union
else:
set_func = intersection
# range parameter
range_arg = record.get('range',None)
if range_arg:
opr = "range"
opr_args = []
if range_arg.find("min") > -1:
opr_args.append("min")
if range_arg.find("max") > -1:
opr_args.append("max")
if record.get('usage',None):
# see if any usage params are sent to field
opr = record.usage.lower().split(':')
opr, opr_args = opr[0], opr[1:]
if opr=="range": # range search
if 'min' in opr_args:
lo = min(keys)
else:
lo = None
if 'max' in opr_args:
hi = max(keys)
else:
hi = None
if hi:
setlist = index.items(lo,hi)
else:
setlist = index.items(lo)
# XXX: Use multiunion!
for k, set in setlist:
if type(set) is IntType:
set = IISet((set,))
r = set_func(r, set)
else: # not a range search
for key in keys:
set = index.get(key, None)
if set is not None:
if type(set) is IntType:
set = IISet((set,))
r = set_func(r, set)
if type(r) is IntType:
r = IISet((r,))
if r is None:
return IISet(), (self.id,)
else:
return r, (self.id,)
def _convert( self, value, default=None ):
"""Convert Date/Time value to our internal representation"""
if isinstance( value, DateTime ):
t_tup = value.parts()
elif type( value ) is FloatType:
t_tup = time.gmtime( value )
elif type( value ) is StringType:
t_obj = DateTime( value )
t_tup = t_obj.parts()
else:
return default
yr = t_tup[0]
mo = t_tup[1]
dy = t_tup[2]
hr = t_tup[3]
mn = t_tup[4]
t_val = ( ( ( ( yr * 12 + mo ) * 31 + dy ) * 24 + hr ) * 60 + mn )
return t_val
manage_addDateIndexForm = DTMLFile( 'dtml/addDateIndex', globals() )
def manage_addDateIndex( self, id, REQUEST=None, RESPONSE=None, URL3=None):
"""Add a Date index"""
return self.manage_addIndex(id, 'DateIndex', extra=None, \
REQUEST=REQUEST, RESPONSE=RESPONSE, URL1=URL3)
DateIndex README
Overview
Normal FieldIndexes *can* be used to index values which are DateTime
instances, but they are hideously expensive:
o DateTime instances are *huge*, both in RAM and on disk.
o DateTime instances maintain an absurd amount of precision, far
beyond any reasonable search criteria for "normal" cases.
DateIndex is a pluggable index which addresses these two issues
as follows:
o It normalizes the indexed value to an integer representation
with a granularity of one minute.
o It normalizes the 'query' value into the same form.
o Objects which return 'None' for the index query are omitted from
the index.
<dtml-var manage_page_header>
<dtml-var "manage_form_title(this(), _,
form_title='Add DateIndex',
)">
<p class="form-help">
A <em>DateIndex</em> indexes DateTime attributes.
</p>
<form action="manage_addDateIndex" method="post" enctype="multipart/form-data">
<table cellspacing="0" cellpadding="2" border="0">
<tr>
<td align="left" valign="top">
<div class="form-label">
Id
</div>
</td>
<td align="left" valign="top">
<input type="text" name="id" size="40" />
</td>
</tr>
<tr>
<td align="left" valign="top">
<div class="form-optional">
Type
</div>
</td>
<td align="left" valign="top">
DateIndex
</td>
</tr>
<tr>
<td align="left" valign="top">
</td>
<td align="left" valign="top">
<div class="form-element">
<input class="form-element" type="submit" name="submit"
value=" Add " />
</div>
</td>
</tr>
</table>
</form>
<dtml-var manage_page_footer>
<dtml-var manage_page_header>
<dtml-var manage_tabs>
<p class="form-help">
Nothing to manage at this time.
</p>
<dtml-var manage_page_footer>
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
import Zope
import unittest
from DateTime import DateTime
from Products.PluginIndexes.DateIndex.DateIndex import DateIndex
class Dummy:
def __init__(self, name, date):
self._name = name
self._date = date
def name(self):
return self._name
def date(self):
return self._date
def __str__(self):
return "<Dummy %s, date %s>" % (self._name, str(self._date))
class DI_Tests(unittest.TestCase):
def setUp(self):
self._values = (
(0, Dummy('a', None)),
(1, Dummy('b', DateTime(0))),
(2, Dummy('c', DateTime('2002-05-08 15:16:17'))),
(3, Dummy('d', DateTime('2032-05-08 15:16:17'))),
(4, Dummy('e', DateTime('2062-05-08 15:16:17'))),
(5, Dummy('e', DateTime('2062-05-08 15:16:17')))
)
self._index = DateIndex('date')
self._noop_req = {'bar': 123}
self._request = {'date': DateTime(0)}
self._min_req = {'date': DateTime('2032-05-08 15:16:17'),
'date_usage': 'range:min'}
self._max_req = {'date': DateTime('2032-05-08 15:16:17'),
'date_usage': 'range:max'}
self._range_req = {'date': (DateTime('2002-05-08 15:16:17'),
DateTime('2062-05-08 15:16:17')),
'date_usage': 'range:min:max'}
self._zero_req = {'date': 0}
self._none_req = {'date': None}
def _populateIndex( self ):
for k, v in self._values:
self._index.index_object(k, v)
def _checkApply(self, req, expectedValues):
result, used = self._index._apply_index(req)
if hasattr(result, 'keys'):
result = result.keys()
self.failUnlessEqual(used, ('date',))
self.failUnlessEqual(len(result), len(expectedValues),
'%s | %s' % (map(None, result), expectedValues))
for k, v in expectedValues:
self.failUnless(k in result)
def _convert(self, date):
yr, mo, dy, hr, mn = date.parts()[:5]
return (((yr * 12 + mo) * 31 + dy) * 24 + hr) * 60 + mn
def test_empty(self):
empty = self._index
self.failUnlessEqual(len(empty), 0)
self.failUnlessEqual(len(empty.referencedObjects()), 0)
self.failUnless(empty.getEntryForObject(1234) is None)
marker = []
self.failUnless(empty.getEntryForObject(1234, marker) is marker)
empty.unindex_object(1234) # shouldn't throw
self.failUnless(empty.hasUniqueValuesFor('date'))
self.failIf(empty.hasUniqueValuesFor('foo'))
self.failUnlessEqual(len(empty.uniqueValues('date')), 0)
self.failUnless(empty._apply_index({'zed': 12345}) is None)
self._checkApply(self._request, [])
self._checkApply(self._min_req, [])
self._checkApply(self._max_req, [])
self._checkApply(self._range_req, [])
def test_retrieval( self ):
self._populateIndex()
values = self._values
index = self._index
self.failUnlessEqual(len(index), len(values) - 2) # One dupe, one empty
self.failUnlessEqual(len(index.referencedObjects()), len(values) - 1)
# One empty
self.failUnless(index.getEntryForObject(1234) is None)
marker = []
self.failUnless(index.getEntryForObject(1234, marker) is marker)
index.unindex_object(1234) # shouldn't throw
for k, v in values:
if v.date():
self.failUnlessEqual(self._index.getEntryForObject(k),
self._convert(v.date()))
self.failUnlessEqual(len(index.uniqueValues('date')), len(values) - 2)
self.failUnless(index._apply_index(self._noop_req) is None)
self._checkApply(self._request, values[1:2])
self._checkApply(self._min_req, values[3:])
self._checkApply(self._max_req, values[1:4])
self._checkApply(self._range_req, values[2:] )
def test_suite():
suite = unittest.TestSuite()
suite.addTest( unittest.makeSuite( DI_Tests ) )
return suite
def run():
unittest.TextTestRunner().run(test_suite())
if __name__ == '__main__':
run()
DateRangeIndex README
Overview
Zope applications frequently wish to perform efficient queries
against a pair of date attributes/methods, representing a time
interval (e.g., effective / expiration dates). This query *can*
be done using a pair of indexes, but this implementation is
hideously expensive:
o DateTime instances are *huge*, both in RAM and on disk.
o DateTime instances maintain an absurd amount of precision, far
beyond any reasonable search criteria for "normal" cases.
o Results must be fetched and intersected between two indexes.
o Handling objects which do not specify both endpoints (i.e.,
where the interval is open or half-open) is iffy, as the
default value needs to be coerced into a different abnormal
value for each end to permit ordered comparison.
o The *very* common case of the open interval (neither endpoint
specified) should be optimized.
DateRangeIndex is a pluggable index which addresses these issues
as follows:
o It groups the "open" case into a special set, '_always'.
o It maintains separate ordered sets for each of the "half-open"
cases.
o It performs the expensive "intersect two range search" operation
only on the (usually small) set of objects which provide a
closed interval.
o It flattens the key values into integers with granularity of
one minute.
o It normalizes the 'query' value into the same form.
<dtml-var manage_page_header>
<dtml-var "manage_form_title(this(), _,
form_title='Add DateRangeIndex')">
<p class="form-help">
A DateRangeIndex takes the name of two input attributes; one containing the
start date of the range, the second the end of the range. This index is filled
with range information based on those two markers. You can then search for
objects for those where a given date falls within the range.
</p>
<form action="addDateRangeIndex" method="POST">
<table cellspacing="0" cellpadding="2" border="0">
<tr>
<td align="left" valign="top">
<div class="form-label">
Id
</div>
</td>
<td align="left" valign="top">
<input type="text" name="id" size="40" />
</td>
</tr>
<tr>
<td align="left" valign="top">
<div class="form-label">
Since field
</div>
</td>
<td align="left" valign="top">
<input type="text" name="extra.since_field:record" size="40" />
</td>
</tr>
<tr>
<td align="left" valign="top">
<div class="form-label">
Until field
</div>
</td>
<td align="left" valign="top">
<input type="text" name="extra.until_field:record" size="40" />
</td>
</tr>
<tr>
<td align="left" valign="top">
</td>
<td align="left" valign="top">
<div class="form-element">
<input class="form-element" type="submit" name="submit"
value=" Add " />
</div>
</td>
</tr>
</table>
</form>
<dtml-var manage_page_footer>
<dtml-var manage_page_header>
<dtml-var manage_tabs>
<p class="form-help">
You can update this DateRangeIndex by editing the following field and clicking
<emUpdate</em>.
</p>
<form action="&dtml-URL1;/manage_edit" method="POST">
<table cellpadding="2" cellspacing="0" border="0">
<tr>
<td align="left" valign="top">
<div class="form-label">
Since field
</td>
<td align="left" valign="top">
<input name="since_field" value="&dtml-getSinceField;" size="40" />
</td>
</tr>
<td align="left" valign="top">
<div class="form-label">
Until field
</td>
<td align="left" valign="top">
<input name="until_field" value="&dtml-getUntilField;" />
</td>
</tr>
<tr>
<td></td>
<td align="left" valign="top">
<div class="form-element">
<input class="form-element" type="submit" name="submit"
value="Update">
</div>
</td>
</tr>
</table>
</form>
<dtml-var manage_page_footer>
##############################################################################
#
# Copyright (c) 2001 Zope Corporation and Contributors. All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
import Zope
import unittest
from Products.PluginIndexes.DateRangeIndex.DateRangeIndex import DateRangeIndex
class Dummy:
def __init__( self, name, start, stop ):
self._name = name
self._start = start
self._stop = stop
def name( self ):
return self._name
def start( self ):
return self._start
def stop( self ):
return self._stop
def datum( self ):
return ( self._start, self._stop )
dummies = [ Dummy( 'a', None, None )
, Dummy( 'b', None, None )
, Dummy( 'c', 0, None )
, Dummy( 'd', 10, None )
, Dummy( 'e', None, 4 )
, Dummy( 'f', None, 11 )
, Dummy( 'g', 0, 11 )
, Dummy( 'h', 2, 9 )
]
def matchingDummies( value ):
result = []
for dummy in dummies:
if ( ( dummy.start() is None or dummy.start() <= value )
and ( dummy.stop() is None or dummy.stop() >= value )
):
result.append( dummy )
return result
class DRI_Tests( unittest.TestCase ):
def setUp( self ):
pass
def tearDown( self ):
pass
def test_empty( self ):
empty = DateRangeIndex( 'empty' )
assert empty.getEntryForObject( 1234 ) is None
empty.unindex_object( 1234 ) # shouldn't throw
assert not empty.uniqueValues( 'foo' )
assert not empty.uniqueValues( 'foo', 1 )
assert empty._apply_index( { 'zed' : 12345 } ) is None
result, used = empty._apply_index( { 'empty' : 12345 } )
assert not result
assert used == ( None, None )
def test_retrieval( self ):
work = DateRangeIndex( 'work', 'start', 'stop' )
for i in range( len( dummies ) ):
work.index_object( i, dummies[i] )
for i in range( len( dummies ) ):
assert work.getEntryForObject( i ) == dummies[i].datum()
for value in range( -1, 15 ):
matches = matchingDummies( value )
results, used = work._apply_index( { 'work' : value } )
assert used == ( 'start', 'stop' )
assert len( matches ) == len( results ), ( '%s: %s == %s'
% ( value, map( lambda x: x.name(), matches ), results ) )
matches.sort( lambda x, y: cmp( x.name(), y.name() ) )
for result, match in map( None, results, matches ):
assert work.getEntryForObject( result ) == match.datum()
def test_suite():
suite = unittest.TestSuite()
suite.addTest( unittest.makeSuite( DRI_Tests ) )
return suite
def run():
unittest.TextTestRunner().run(test_suite())
if __name__ == '__main__':
run()
......@@ -20,8 +20,17 @@ import TextIndex.TextIndex
import FieldIndex.FieldIndex
import KeywordIndex.KeywordIndex
import TopicIndex.TopicIndex
_indexes = ('TextIndex','KeywordIndex','FieldIndex','PathIndex','TopicIndex')
import DateIndex.DateIndex
import DateRangeIndex.DateRangeIndex
_indexes = ('TextIndex',
'KeywordIndex',
'FieldIndex',
'PathIndex',
'TopicIndex',
'DateIndex',
'DateRangeIndex',
)
def initialize(context):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment