Commit bd203ff0 authored by Yoshinori Okuji's avatar Yoshinori Okuji

Fix some bugs. Add more tests and a profiler.

git-svn-id: https://svn.erp5.org/repos/neo/branches/prototype3@76 71dcc9de-d417-0410-9af5-da40c76e7ee4
parent cb72a2c6
############################################################################## ##############################################################################
# #
# Copyright (c) 2005 Nexedi SARL and Contributors. All Rights Reserved. # Copyright (c) 2005 Nexedi SARL and Contributors. All Rights Reserved.
# Yoshinori Okuji <yo@nexedi.com> # Yoshinori Okuji <yo@nexedi.com>
# #
# WARNING: This program as such is intended to be used by professional # WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential # programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs # consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial # End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software # garantees and support are strongly adviced to contract a Free Software
# Service Company # Service Company
# #
# This program is Free Software; you can redistribute it and/or # This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License # modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2 # as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version. # of the License, or (at your option) any later version.
# #
# This program is distributed in the hope that it will be useful, # This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of # but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details. # GNU General Public License for more details.
# #
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software # along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# #
############################################################################## ##############################################################################
""" """
Multi-Queue Cache Algorithm. Multi-Queue Cache Algorithm.
""" """
from math import log from math import log
class Element: class Element(object):
""" """
This class defines an element of a FIFO buffer. This class defines an element of a FIFO buffer.
""" """
pass pass
class FIFO: class FIFO(object):
""" """
This class implements a FIFO buffer. This class implements a FIFO buffer.
""" """
def __init__(self): def __init__(self):
self._head = None self._head = None
self._tail = None self._tail = None
self._len = 0 self._len = 0
def __len__(self): def __len__(self):
return self._len return self._len
def append(self): def append(self):
element = Element() element = Element()
element.next = None element.next = None
element.prev = self._tail element.prev = self._tail
if self._tail is not None: if self._tail is not None:
self._tail.next = element self._tail.next = element
self._tail = element self._tail = element
if self._head is None: if self._head is None:
self._head = element self._head = element
self._len += 1 self._len += 1
return element return element
def head(self): def head(self):
return self._head return self._head
def tail(self): def tail(self):
return self._tail return self._tail
def shift(self): def shift(self):
element = self._head element = self._head
if element is None: if element is None:
return None return None
del self[element] del self[element]
return element return element
def __delitem__(self, element): def __delitem__(self, element):
if element.next is None: if element.next is None:
self._tail = element.prev self._tail = element.prev
else: else:
element.next.prev = element.prev element.next.prev = element.prev
if element.prev is None: if element.prev is None:
self._head = element.next self._head = element.next
else: else:
element.prev.next = element.next element.prev.next = element.next
self._len -= 1 self._len -= 1
class Data: class Data(object):
""" """
Data for each element in a FIFO buffer. Data for each element in a FIFO buffer.
""" """
pass pass
class MQ: class MQ(object):
""" """
This class manages cached data by a variant of Multi-Queue. This class manages cached data by a variant of Multi-Queue.
This class caches various sizes of objects. Here are some considerations: This class caches various sizes of objects. Here are some considerations:
- Expired objects are not really deleted immediately. But if GC is invoked too often, - Expired objects are not really deleted immediately. But if GC is invoked too often,
it degrades the performance significantly. it degrades the performance significantly.
- If large objects are cached, the number of cached objects decreases. This might affect - If large objects are cached, the number of cached objects decreases. This might affect
the cache hit ratio. It might be better to tweak a buffer level according to the size of the cache hit ratio. It might be better to tweak a buffer level according to the size of
an object. an object.
- Stored values must be strings. - Stored values must be strings.
- The size calculation is not accurate. - The size calculation is not accurate.
""" """
def __init__(self, life_time=10000, buffer_levels=9, max_history_size=100000, max_size=20*1024*1024): def __init__(self, life_time=10000, buffer_levels=9, max_history_size=100000, max_size=20*1024*1024):
self._history_buffer = FIFO() self._history_buffer = FIFO()
self._cache_buffers = [] self._cache_buffers = []
for level in range(buffer_levels): for level in range(buffer_levels):
self._cache_buffers.append(FIFO()) self._cache_buffers.append(FIFO())
self._data = {} self._data = {}
self._time = 0 self._time = 0
self._life_time = life_time self._life_time = life_time
self._buffer_levels = buffer_levels self._buffer_levels = buffer_levels
self._max_history_size = max_history_size self._max_history_size = max_history_size
self._max_size = max_size self._max_size = max_size
self._size = 0 self._size = 0
def has_key(self, id): def has_key(self, id):
if id in self._data: if id in self._data:
data = self._data[id] data = self._data[id]
if data.level >= 0: if data.level >= 0:
return 1 return 1
return 0 return 0
__contains__ = has_key __contains__ = has_key
def fetch(self, id): def fetch(self, id):
""" """
Fetch a value associated with the id. Fetch a value associated with the id.
""" """
if id in self._data: data = self._data[id]
data = self._data[id] if data.level >= 0:
if data.level >= 0: value = data.value
del self._cache_buffers[data.level][data.element] self._size -= len(value) # XXX inaccurate
value = data.value self.store(id, value)
self._size -= len(value) # XXX inaccurate return value
self.store(id, value) raise KeyError(id)
return value
raise KeyError, "%s was not found in the cache" % id __getitem__ = fetch
__getitem__ = fetch def get(self, id, d=None):
try:
def get(self, id, d=None): return self.fetch(id)
try: except KeyError:
return self.fetch(id) return d
except KeyError:
return d def _evict(self, id):
"""
def _evict(self, id): Evict an element to the history buffer.
""" """
Evict an element to the history buffer. data = self._data[id]
""" self._size -= len(data.value) # XXX inaccurate
data = self._data[id] del self._cache_buffers[data.level][data.element]
self._size -= len(data.value) # XXX inaccurate element = self._history_buffer.append()
del self._cache_buffers[data.level][data.element] data.level = -1
element = self._history_buffer.append() data.element = element
data.level = -1 delattr(data, 'value')
data.element = element delattr(data, 'expire_time')
delattr(data, 'value') element.data = data
delattr(data, 'expire_time') if len(self._history_buffer) > self._max_history_size:
element.data = data element = self._history_buffer.shift()
if len(self._history_buffer) > self._max_history_size: del self._data[element.data.id]
element = self._history_buffer.shift()
del self._data[element.data.id] def store(self, id, value):
cache_buffers = self._cache_buffers
def store(self, id, value):
if id in self._data: try:
data = self._data[id] data = self._data[id]
level, element, counter = data.level, data.element, data.counter + 1 level, element, counter = data.level, data.element, data.counter + 1
if level >= 0: if level >= 0:
del self._cache_buffers[level][element] del cache_buffers[level][element]
else: else:
del self._history_buffer[element] del self._history_buffer[element]
else: except KeyError:
counter = 1 counter = 1
# XXX It might be better to adjust the level according to the object size. # XXX It might be better to adjust the level according to the object size.
level = int(log(counter, 2)) level = min(int(log(counter, 2)), self._buffer_levels - 1)
if level >= self._buffer_levels: element = cache_buffers[level].append()
level = self._buffer_levels - 1 data = Data()
element = self._cache_buffers[level].append() data.id = id
data = Data() data.expire_time = self._time + self._life_time
data.id = id data.level = level
data.expire_time = self._time + self._life_time data.element = element
data.level = level data.value = value
data.element = element data.counter = counter
data.value = value element.data = data
data.counter = counter self._data[id] = data
element.data = data self._size += len(value) # XXX inaccurate
self._data[id] = data
self._size += len(value) # XXX inaccurate self._time += 1
self._time += 1 # Expire old elements.
time = self._time
# Expire old elements. for level in xrange(self._buffer_levels):
for level in range(self._buffer_levels): cache_buffer = cache_buffers[level]
cache_buffer = self._cache_buffers[level] head = cache_buffer.head()
head = cache_buffer.head() if head is not None and head.data.expire_time < time:
if head is not None and head.data.expire_time < self._time: del cache_buffer[head]
del cache_buffer[head] data = head.data
data = head.data if level > 0:
if level > 0: new_level = level - 1
new_level = level - 1 element = cache_buffers[new_level].append()
element = cache_buffer[new_level].append() element.data = data
element.data = data data.expire_time = time + self._life_time
data.expire_time = self._time + self._life_time data.level = new_level
data.level = new_level data.element = element
data.element = element else:
else: self._evict(data.id)
self._evict(data.id)
# Limit the size.
# Limit the size. size = self._size
size = self._size max_size = self._max_size
max_size = self._max_size if size > max_size:
if size > max_size: for cache_buffer in cache_buffers:
for cache_buffer in self._cache_buffers: while size > max_size:
while size > max_size: element = cache_buffer.shift()
element = cache_buffer.shift() if element is None:
if element is None: break
break data = element.data
data = element.data del self._data[data.id]
del self._data[data.id] size -= len(data.value) # XXX inaccurate
size -= len(data.value) # XXX inaccurate if size <= max_size:
if size <= max_size: break
break self._size = size
self._size = size
__setitem__ = store
__setitem__ = store
def invalidate(self, id):
def invalidate(self, id): if id in self._data:
if id in self._data: data = self._data[id]
data = self._data[id] if data.level >= 0:
if data.level >= 0: del self._cache_buffers[data.level][data.element]
del self._cache_buffers[data.level][data.element] self._evict(id)
self._evict(id) return
return raise KeyError, "%s was not found in the cache" % id
raise KeyError, "%s was not found in the cache" % id
__delitem__ = invalidate
__delitem__ = invalidate
# Here is a test.
# Here is a test. if __name__ == '__main__':
if __name__ == '__main__': import hotshot, hotshot.stats
cache = MQ()
cache[1] = "1" def test():
cache[2] = "2" cache = MQ(life_time=100, buffer_levels=9, max_history_size=10000,
assert cache.get(1) == "1", 'cannot get 1' max_size=2*1024*1024)
assert cache.get(2) == "2", 'cannot get 2'
assert cache.get(3) == None, 'can get 3!' for i in xrange(10000):
del cache[1] assert cache.get(i) is None, '%d should not be present' % i
assert cache.get(1) == None, 'can get 1!'
for i in xrange(10000):
cache[i] = str(i)
assert cache.get(i) == str(i), '%d does not exist' % i
for i in xrange(10000 - 100 - 1):
assert cache.get(i) is None, '%d should not be present' % i
for i in xrange(10):
cache[i] = str(i)
for j in xrange(1000):
for i in xrange(10):
assert cache.get(i) == str(i), '%d does not exist' % i
for i in xrange(10,500):
cache[i] = str(i)
for i in xrange(10):
assert cache.get(i) == str(i), '%d does not exist' % i
prof = hotshot.Profile("mq.prof")
prof.runcall(test)
prof.close()
stats = hotshot.stats.load("mq.prof")
stats.strip_dirs()
stats.sort_stats('time', 'calls')
stats.print_stats(20)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment